This is a R Markdown file to accompany the mansucript: “Rumen microbial composition in Holstein and Jersey cows is different under same dietary condition and is not affected by sampling method” by Paz et al. 2016 in AEM. It was written in R markdown and converted to html using the R knitr package. This enables us to embed the results of our analyses directly into the text to allow for a reproducible data analysis pipeline. A github repository is available.
The analysis from Paz et al., 2016 can be recreated by using the associated R Markdown file. This is setup to be ran on Mac OS X and was initially performed with 4 GB RAM. No root access is neeeded. Analyses should also work in a linux environment as well if the linux versions of USEARCH and anaconda package manager are used.
Due to licensing constraints, USEARCH could not be included in the setup. To obtain a download link, go to the USEARCH download page and select version USEARCH v8.1.1756 for Mac OSX. A link (expires after 30 days) will be sent to the provided email. Use the link as an argument for shell script below.
Simply download the bash script from the github repository and run it (provide the link to download your licensed USEARCH version as an argument for setup.sh):
Anaconda is downloaded and prompts you during installataion of the packages above. The prompts are as follows:
To convert the R markdown to html use the command: render(“dairy_breeds.Rmd”). To start a R session and run the workflow, use these commands from within the direcotry you initiated installation:
The following packages and knitR settings were used to compile this Markdown:
install.packages("ggplot2", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("matrixStats", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("plyr", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("tidyr", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("biom", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("gplots", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("RColorBrewer", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("vegan", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("mvtnorm", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("modeltools", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
install.packages("coin", repos='http://cran.us.r-project.org')
##
## The downloaded source packages are in
## '/private/var/folders/6s/s0b7j4y14kz7fbsq66bxtktm0000gn/T/Rtmp8Ofxug/downloaded_packages'
## Updating HTML index of packages in '.Library'
## Making 'packages.html' ...
## done
sessionInfo()
## R version 3.2.2 (2015-08-14)
## Platform: x86_64-apple-darwin11.4.2 (64-bit)
## Running under: OS X 10.10.5 (Yosemite)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.12.3 rmarkdown_0.9.5 BiocInstaller_1.20.1
##
## loaded via a namespace (and not attached):
## [1] magrittr_1.5 formatR_1.2.1 htmltools_0.3 tools_3.2.2 yaml_2.1.13
## [6] stringi_1.0-1 stringr_1.0.0 digest_0.6.9 evaluate_0.8
perm = 1e4
opts_chunk$set("tidy" = TRUE)
opts_chunk$set("echo" = TRUE)
opts_chunk$set("eval" = TRUE)
opts_chunk$set("warning" = FALSE)
opts_chunk$set("cache" = TRUE)
Download the file containing the dry matter intake (kg/d) and milk yield (kg/d) across cows.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/production_measures.txt
## --2016-03-02 06:06:16-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/production_measures.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 307 [text/plain]
## Saving to: 'production_measures.txt'
##
## 0K 100% 48.8M=0s
##
## 2016-03-02 06:06:17 (48.8 MB/s) - 'production_measures.txt' saved [307/307]
Generate bar chart of lactation measures for Holstein and Jersey breeds
library(tidyr)
library(plyr)
library(ggplot2)
production_wide <- read.table("production_measures.txt", header = T, sep = "\t")
production_long <- gather(production_wide, Measure, kg_d, DMI:FE)
data_summary <- ddply(production_long, c("Breed", "Measure"), summarise, N = length(kg_d),
mean = mean(kg_d), sd = sd(kg_d), se = sd/sqrt(N))
production_summary <- subset(data_summary, Measure != "FE")
efficiency_summary <- subset(data_summary, Measure == "FE")
# Error bars represent standard error of the mean
production_plot <- ggplot(production_summary, aes(x = Measure, y = mean, fill = Breed)) +
ylab("kg/d") + scale_fill_brewer("Paired") + geom_bar(position = position_dodge(),
stat = "identity", color = "black", size = 0.5, width = 0.9) + geom_errorbar(aes(ymin = mean -
se, ymax = mean + se), size = 0.5, width = 0.3, position = position_dodge(0.9)) +
theme(legend.title = element_blank(), axis.text = element_text(color = "black",
size = 12, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(color = "black",
size = 10, face = "bold")) + scale_x_discrete(breaks = c("DMI", "MY"),
labels = c("Dry Matter Intake", "Millk Yield"))
efficiency_plot <- ggplot(efficiency_summary, aes(x = Measure, y = mean, fill = Breed)) +
xlab("Feed Efficiency") + ylab("kg milk/kg dry matter intake") + scale_fill_brewer("Paired") +
geom_bar(position = position_dodge(), stat = "identity", color = "black",
size = 0.5, width = 0.4) + geom_errorbar(aes(ymin = mean - se, ymax = mean +
se), size = 0.5, width = 0.1, position = position_dodge(0.4)) + theme(legend.title = element_blank(),
axis.text.x = element_blank(), axis.ticks.x = element_blank(), axis.text = element_text(color = "black",
size = 12, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(color = "black",
size = 10, face = "bold"))
multiplot <- function(..., plotlist = NULL, file, cols = 1, layout = NULL) {
library(grid)
plots <- c(list(...), plotlist)
numPlots = length(plots)
if (is.null(layout)) {
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols,
nrow = ceiling(numPlots/cols))
}
if (numPlots == 1) {
print(plots[[1]])
} else {
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
for (i in 1:numPlots) {
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col))
}
}
}
png("FigS1.png", units = "in", height = 4, width = 12, res = 300)
multiplot(production_plot, efficiency_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
pdf("FigS1.pdf", height = 6, width = 12)
multiplot(production_plot, efficiency_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
library(car)
cow_measures <- read.table("production_measures.txt", header = T, sep = "\t")
# Levene test Ho: Variances of breeds dry matter intake are equal
leveneTest(cow_measures$DMI ~ cow_measures$Breed)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.5255 0.492
## 7
# T test Ho: mean DMI Holstein = mean DMI Jersey Two-sided t test default
# options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(cow_measures$DMI ~ cow_measures$Breed, var.eq = T)
##
## Two Sample t-test
##
## data: cow_measures$DMI by cow_measures$Breed
## t = 6.5245, df = 7, p-value = 0.0003265
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 2.929666 6.260334
## sample estimates:
## mean in group Holstein mean in group Jersey
## 23.820 19.225
# Levene test Ho: Variances of breeds milk yield are equal
leveneTest(cow_measures$MY ~ cow_measures$Breed)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.962 0.3594
## 7
# T test Ho: mean MY Holstein = mean MY Jersey Two-sided t test default
# options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(cow_measures$MY ~ cow_measures$Breed, var.eq = T)
##
## Two Sample t-test
##
## data: cow_measures$MY by cow_measures$Breed
## t = 3.3752, df = 7, p-value = 0.01184
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 4.420873 25.109127
## sample estimates:
## mean in group Holstein mean in group Jersey
## 38.540 23.775
# Levene test Ho: Variances of breeds feed efficiency are equal
leveneTest(cow_measures$FE ~ cow_measures$Breed)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.3437 0.5761
## 7
# T test Ho: mean FE Holstein = mean FE Jersey Two-sided t test default
# options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(cow_measures$FE ~ cow_measures$Breed, var.eq = T)
##
## Two Sample t-test
##
## data: cow_measures$FE by cow_measures$Breed
## t = 2.334, df = 7, p-value = 0.0523
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004860513 0.746860513
## sample estimates:
## mean in group Holstein mean in group Jersey
## 1.606 1.235
Sequences were downloaded from the Torrent server in a fastq file (seqs.fastq) which was used to generate a fasta file (seqs.fna). Initial quality control steps were performed using the Torrent Suite Software (refer to manuscript)
convert_fastaqual_fastq.py -c fastq_to_fastaqual –f seqs.fastq –o fastaqual
Because of file size limit in GitHub, the previously described seqs.fastq and seqs.fna files were not uploaded.
Demultiplex the sequences in the library using the mapping file.
split_libraries.py -f seqs.fna -b variable_length -l 0 -L 1000 -x -M 1 -m mappingfile.txt -o seqs_demultiplexed
The seqs_demultiplexed.fna generated is provided. Remove primer and subsequent sequence.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/seqs_demultiplexed.fna.tar.gz
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/mappingfile.txt
tar -zxvf seqs_demultiplexed.fna.tar.gz
truncate_reverse_primer.py -f seqs_demultiplexed.fna -m mappingfile.txt -z truncate_only -M 2 -o seqs_revprimer_truncated
## --2016-03-02 06:06:24-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/seqs_demultiplexed.fna.tar.gz
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 13113749 (13M) [application/octet-stream]
## Saving to: 'seqs_demultiplexed.fna.tar.gz'
##
## 0K .......... .......... .......... .......... .......... 0% 601K 21s
## 50K .......... .......... .......... .......... .......... 0% 1.38M 15s
## 100K .......... .......... .......... .......... .......... 1% 2.16M 12s
## 150K .......... .......... .......... .......... .......... 1% 2.40M 10s
## 200K .......... .......... .......... .......... .......... 1% 2.11M 9s
## 250K .......... .......... .......... .......... .......... 2% 1019K 10s
## 300K .......... .......... .......... .......... .......... 2% 3.52M 9s
## 350K .......... .......... .......... .......... .......... 3% 3.20M 8s
## 400K .......... .......... .......... .......... .......... 3% 2.29M 8s
## 450K .......... .......... .......... .......... .......... 3% 1.83M 8s
## 500K .......... .......... .......... .......... .......... 4% 2.14M 7s
## 550K .......... .......... .......... .......... .......... 4% 2.45M 7s
## 600K .......... .......... .......... .......... .......... 5% 2.30M 7s
## 650K .......... .......... .......... .......... .......... 5% 2.18M 7s
## 700K .......... .......... .......... .......... .......... 5% 1.66M 7s
## 750K .......... .......... .......... .......... .......... 6% 3.06M 7s
## 800K .......... .......... .......... .......... .......... 6% 1.68M 7s
## 850K .......... .......... .......... .......... .......... 7% 2.16M 7s
## 900K .......... .......... .......... .......... .......... 7% 1.83M 7s
## 950K .......... .......... .......... .......... .......... 7% 2.31M 6s
## 1000K .......... .......... .......... .......... .......... 8% 2.46M 6s
## 1050K .......... .......... .......... .......... .......... 8% 2.09M 6s
## 1100K .......... .......... .......... .......... .......... 8% 2.31M 6s
## 1150K .......... .......... .......... .......... .......... 9% 1.82M 6s
## 1200K .......... .......... .......... .......... .......... 9% 1.79M 6s
## 1250K .......... .......... .......... .......... .......... 10% 2.40M 6s
## 1300K .......... .......... .......... .......... .......... 10% 2.15M 6s
## 1350K .......... .......... .......... .......... .......... 10% 1.85M 6s
## 1400K .......... .......... .......... .......... .......... 11% 2.41M 6s
## 1450K .......... .......... .......... .......... .......... 11% 2.31M 6s
## 1500K .......... .......... .......... .......... .......... 12% 2.19M 6s
## 1550K .......... .......... .......... .......... .......... 12% 1.81M 6s
## 1600K .......... .......... .......... .......... .......... 12% 1.79M 6s
## 1650K .......... .......... .......... .......... .......... 13% 2.16M 6s
## 1700K .......... .......... .......... .......... .......... 13% 2.46M 6s
## 1750K .......... .......... .......... .......... .......... 14% 2.18M 6s
## 1800K .......... .......... .......... .......... .......... 14% 1.79M 6s
## 1850K .......... .......... .......... .......... .......... 14% 2.19M 6s
## 1900K .......... .......... .......... .......... .......... 15% 2.77M 5s
## 1950K .......... .......... .......... .......... .......... 15% 2.12M 5s
## 2000K .......... .......... .......... .......... .......... 16% 1.34M 5s
## 2050K .......... .......... .......... .......... .......... 16% 2.91M 5s
## 2100K .......... .......... .......... .......... .......... 16% 2.33M 5s
## 2150K .......... .......... .......... .......... .......... 17% 2.10M 5s
## 2200K .......... .......... .......... .......... .......... 17% 1.88M 5s
## 2250K .......... .......... .......... .......... .......... 17% 2.36M 5s
## 2300K .......... .......... .......... .......... .......... 18% 2.31M 5s
## 2350K .......... .......... .......... .......... .......... 18% 2.31M 5s
## 2400K .......... .......... .......... .......... .......... 19% 1.22M 5s
## 2450K .......... .......... .......... .......... .......... 19% 2.40M 5s
## 2500K .......... .......... .......... .......... .......... 19% 2.81M 5s
## 2550K .......... .......... .......... .......... .......... 20% 2.39M 5s
## 2600K .......... .......... .......... .......... .......... 20% 2.20M 5s
## 2650K .......... .......... .......... .......... .......... 21% 2.02M 5s
## 2700K .......... .......... .......... .......... .......... 21% 2.13M 5s
## 2750K .......... .......... .......... .......... .......... 21% 2.45M 5s
## 2800K .......... .......... .......... .......... .......... 22% 1.55M 5s
## 2850K .......... .......... .......... .......... .......... 22% 1.90M 5s
## 2900K .......... .......... .......... .......... .......... 23% 2.08M 5s
## 2950K .......... .......... .......... .......... .......... 23% 3.01M 5s
## 3000K .......... .......... .......... .......... .......... 23% 2.17M 5s
## 3050K .......... .......... .......... .......... .......... 24% 2.14M 5s
## 3100K .......... .......... .......... .......... .......... 24% 1.61M 5s
## 3150K .......... .......... .......... .......... .......... 24% 2.90M 5s
## 3200K .......... .......... .......... .......... .......... 25% 1.78M 5s
## 3250K .......... .......... .......... .......... .......... 25% 2.04M 5s
## 3300K .......... .......... .......... .......... .......... 26% 2.09M 5s
## 3350K .......... .......... .......... .......... .......... 26% 2.10M 5s
## 3400K .......... .......... .......... .......... .......... 26% 2.48M 5s
## 3450K .......... .......... .......... .......... .......... 27% 2.21M 5s
## 3500K .......... .......... .......... .......... .......... 27% 2.16M 5s
## 3550K .......... .......... .......... .......... .......... 28% 1.65M 4s
## 3600K .......... .......... .......... .......... .......... 28% 1.81M 4s
## 3650K .......... .......... .......... .......... .......... 28% 2.68M 4s
## 3700K .......... .......... .......... .......... .......... 29% 2.13M 4s
## 3750K .......... .......... .......... .......... .......... 29% 2.16M 4s
## 3800K .......... .......... .......... .......... .......... 30% 1.95M 4s
## 3850K .......... .......... .......... .......... .......... 30% 2.59M 4s
## 3900K .......... .......... .......... .......... .......... 30% 1.88M 4s
## 3950K .......... .......... .......... .......... .......... 31% 2.52M 4s
## 4000K .......... .......... .......... .......... .......... 31% 1.49M 4s
## 4050K .......... .......... .......... .......... .......... 32% 2.33M 4s
## 4100K .......... .......... .......... .......... .......... 32% 2.35M 4s
## 4150K .......... .......... .......... .......... .......... 32% 2.15M 4s
## 4200K .......... .......... .......... .......... .......... 33% 1.33M 4s
## 4250K .......... .......... .......... .......... .......... 33% 1.05M 4s
## 4300K .......... .......... .......... .......... .......... 33% 4.04M 4s
## 4350K .......... .......... .......... .......... .......... 34% 7.91M 4s
## 4400K .......... .......... .......... .......... .......... 34% 1.06M 4s
## 4450K .......... .......... .......... .......... .......... 35% 3.01M 4s
## 4500K .......... .......... .......... .......... .......... 35% 3.17M 4s
## 4550K .......... .......... .......... .......... .......... 35% 3.19M 4s
## 4600K .......... .......... .......... .......... .......... 36% 3.30M 4s
## 4650K .......... .......... .......... .......... .......... 36% 1.74M 4s
## 4700K .......... .......... .......... .......... .......... 37% 2.68M 4s
## 4750K .......... .......... .......... .......... .......... 37% 2.34M 4s
## 4800K .......... .......... .......... .......... .......... 37% 1.62M 4s
## 4850K .......... .......... .......... .......... .......... 38% 1.73M 4s
## 4900K .......... .......... .......... .......... .......... 38% 3.11M 4s
## 4950K .......... .......... .......... .......... .......... 39% 2.17M 4s
## 5000K .......... .......... .......... .......... .......... 39% 2.09M 4s
## 5050K .......... .......... .......... .......... .......... 39% 1.72M 4s
## 5100K .......... .......... .......... .......... .......... 40% 3.24M 4s
## 5150K .......... .......... .......... .......... .......... 40% 1.97M 4s
## 5200K .......... .......... .......... .......... .......... 40% 1.45M 4s
## 5250K .......... .......... .......... .......... .......... 41% 2.41M 4s
## 5300K .......... .......... .......... .......... .......... 41% 1.95M 4s
## 5350K .......... .......... .......... .......... .......... 42% 2.29M 4s
## 5400K .......... .......... .......... .......... .......... 42% 2.33M 4s
## 5450K .......... .......... .......... .......... .......... 42% 2.40M 4s
## 5500K .......... .......... .......... .......... .......... 43% 2.00M 3s
## 5550K .......... .......... .......... .......... .......... 43% 1.96M 3s
## 5600K .......... .......... .......... .......... .......... 44% 1.85M 3s
## 5650K .......... .......... .......... .......... .......... 44% 2.11M 3s
## 5700K .......... .......... .......... .......... .......... 44% 1.92M 3s
## 5750K .......... .......... .......... .......... .......... 45% 2.65M 3s
## 5800K .......... .......... .......... .......... .......... 45% 1.86M 3s
## 5850K .......... .......... .......... .......... .......... 46% 2.61M 3s
## 5900K .......... .......... .......... .......... .......... 46% 2.22M 3s
## 5950K .......... .......... .......... .......... .......... 46% 1.77M 3s
## 6000K .......... .......... .......... .......... .......... 47% 1.99M 3s
## 6050K .......... .......... .......... .......... .......... 47% 2.16M 3s
## 6100K .......... .......... .......... .......... .......... 48% 2.12M 3s
## 6150K .......... .......... .......... .......... .......... 48% 1.93M 3s
## 6200K .......... .......... .......... .......... .......... 48% 2.20M 3s
## 6250K .......... .......... .......... .......... .......... 49% 2.19M 3s
## 6300K .......... .......... .......... .......... .......... 49% 2.73M 3s
## 6350K .......... .......... .......... .......... .......... 49% 2.02M 3s
## 6400K .......... .......... .......... .......... .......... 50% 1.47M 3s
## 6450K .......... .......... .......... .......... .......... 50% 2.14M 3s
## 6500K .......... .......... .......... .......... .......... 51% 2.53M 3s
## 6550K .......... .......... .......... .......... .......... 51% 2.41M 3s
## 6600K .......... .......... .......... .......... .......... 51% 1.70M 3s
## 6650K .......... .......... .......... .......... .......... 52% 2.96M 3s
## 6700K .......... .......... .......... .......... .......... 52% 2.10M 3s
## 6750K .......... .......... .......... .......... .......... 53% 1.93M 3s
## 6800K .......... .......... .......... .......... .......... 53% 1.52M 3s
## 6850K .......... .......... .......... .......... .......... 53% 2.46M 3s
## 6900K .......... .......... .......... .......... .......... 54% 2.45M 3s
## 6950K .......... .......... .......... .......... .......... 54% 2.25M 3s
## 7000K .......... .......... .......... .......... .......... 55% 2.14M 3s
## 7050K .......... .......... .......... .......... .......... 55% 2.15M 3s
## 7100K .......... .......... .......... .......... .......... 55% 2.13M 3s
## 7150K .......... .......... .......... .......... .......... 56% 2.35M 3s
## 7200K .......... .......... .......... .......... .......... 56% 1.48M 3s
## 7250K .......... .......... .......... .......... .......... 57% 1.71M 3s
## 7300K .......... .......... .......... .......... .......... 57% 2.87M 3s
## 7350K .......... .......... .......... .......... .......... 57% 2.55M 3s
## 7400K .......... .......... .......... .......... .......... 58% 2.24M 3s
## 7450K .......... .......... .......... .......... .......... 58% 2.11M 3s
## 7500K .......... .......... .......... .......... .......... 58% 1.80M 3s
## 7550K .......... .......... .......... .......... .......... 59% 2.26M 2s
## 7600K .......... .......... .......... .......... .......... 59% 1.89M 2s
## 7650K .......... .......... .......... .......... .......... 60% 2.02M 2s
## 7700K .......... .......... .......... .......... .......... 60% 1.79M 2s
## 7750K .......... .......... .......... .......... .......... 60% 3.13M 2s
## 7800K .......... .......... .......... .......... .......... 61% 2.01M 2s
## 7850K .......... .......... .......... .......... .......... 61% 2.01M 2s
## 7900K .......... .......... .......... .......... .......... 62% 2.05M 2s
## 7950K .......... .......... .......... .......... .......... 62% 2.47M 2s
## 8000K .......... .......... .......... .......... .......... 62% 1.71M 2s
## 8050K .......... .......... .......... .......... .......... 63% 2.06M 2s
## 8100K .......... .......... .......... .......... .......... 63% 2.10M 2s
## 8150K .......... .......... .......... .......... .......... 64% 2.21M 2s
## 8200K .......... .......... .......... .......... .......... 64% 2.30M 2s
## 8250K .......... .......... .......... .......... .......... 64% 2.34M 2s
## 8300K .......... .......... .......... .......... .......... 65% 2.21M 2s
## 8350K .......... .......... .......... .......... .......... 65% 1.94M 2s
## 8400K .......... .......... .......... .......... .......... 65% 1.33M 2s
## 8450K .......... .......... .......... .......... .......... 66% 2.09M 2s
## 8500K .......... .......... .......... .......... .......... 66% 2.12M 2s
## 8550K .......... .......... .......... .......... .......... 67% 1.71M 2s
## 8600K .......... .......... .......... .......... .......... 67% 2.58M 2s
## 8650K .......... .......... .......... .......... .......... 67% 2.78M 2s
## 8700K .......... .......... .......... .......... .......... 68% 2.87M 2s
## 8750K .......... .......... .......... .......... .......... 68% 2.01M 2s
## 8800K .......... .......... .......... .......... .......... 69% 1.25M 2s
## 8850K .......... .......... .......... .......... .......... 69% 4.40M 2s
## 8900K .......... .......... .......... .......... .......... 69% 2.32M 2s
## 8950K .......... .......... .......... .......... .......... 70% 2.15M 2s
## 9000K .......... .......... .......... .......... .......... 70% 1.50M 2s
## 9050K .......... .......... .......... .......... .......... 71% 3.48M 2s
## 9100K .......... .......... .......... .......... .......... 71% 2.08M 2s
## 9150K .......... .......... .......... .......... .......... 71% 2.48M 2s
## 9200K .......... .......... .......... .......... .......... 72% 1.42M 2s
## 9250K .......... .......... .......... .......... .......... 72% 2.47M 2s
## 9300K .......... .......... .......... .......... .......... 73% 1.87M 2s
## 9350K .......... .......... .......... .......... .......... 73% 3.18M 2s
## 9400K .......... .......... .......... .......... .......... 73% 1.94M 2s
## 9450K .......... .......... .......... .......... .......... 74% 1.74M 2s
## 9500K .......... .......... .......... .......... .......... 74% 2.76M 2s
## 9550K .......... .......... .......... .......... .......... 74% 2.68M 2s
## 9600K .......... .......... .......... .......... .......... 75% 1.62M 1s
## 9650K .......... .......... .......... .......... .......... 75% 1.99M 1s
## 9700K .......... .......... .......... .......... .......... 76% 2.16M 1s
## 9750K .......... .......... .......... .......... .......... 76% 2.37M 1s
## 9800K .......... .......... .......... .......... .......... 76% 2.24M 1s
## 9850K .......... .......... .......... .......... .......... 77% 2.15M 1s
## 9900K .......... .......... .......... .......... .......... 77% 1.76M 1s
## 9950K .......... .......... .......... .......... .......... 78% 2.83M 1s
## 10000K .......... .......... .......... .......... .......... 78% 1.42M 1s
## 10050K .......... .......... .......... .......... .......... 78% 2.32M 1s
## 10100K .......... .......... .......... .......... .......... 79% 1.67M 1s
## 10150K .......... .......... .......... .......... .......... 79% 2.54M 1s
## 10200K .......... .......... .......... .......... .......... 80% 2.53M 1s
## 10250K .......... .......... .......... .......... .......... 80% 2.21M 1s
## 10300K .......... .......... .......... .......... .......... 80% 2.20M 1s
## 10350K .......... .......... .......... .......... .......... 81% 1.83M 1s
## 10400K .......... .......... .......... .......... .......... 81% 1.60M 1s
## 10450K .......... .......... .......... .......... .......... 81% 2.14M 1s
## 10500K .......... .......... .......... .......... .......... 82% 2.51M 1s
## 10550K .......... .......... .......... .......... .......... 82% 1.43M 1s
## 10600K .......... .......... .......... .......... .......... 83% 2.88M 1s
## 10650K .......... .......... .......... .......... .......... 83% 3.53M 1s
## 10700K .......... .......... .......... .......... .......... 83% 2.01M 1s
## 10750K .......... .......... .......... .......... .......... 84% 1.49M 1s
## 10800K .......... .......... .......... .......... .......... 84% 2.31M 1s
## 10850K .......... .......... .......... .......... .......... 85% 2.39M 1s
## 10900K .......... .......... .......... .......... .......... 85% 2.13M 1s
## 10950K .......... .......... .......... .......... .......... 85% 1.20M 1s
## 11000K .......... .......... .......... .......... .......... 86% 2.56M 1s
## 11050K .......... .......... .......... .......... .......... 86% 2.83M 1s
## 11100K .......... .......... .......... .......... .......... 87% 2.01M 1s
## 11150K .......... .......... .......... .......... .......... 87% 2.61M 1s
## 11200K .......... .......... .......... .......... .......... 87% 981K 1s
## 11250K .......... .......... .......... .......... .......... 88% 3.09M 1s
## 11300K .......... .......... .......... .......... .......... 88% 4.16M 1s
## 11350K .......... .......... .......... .......... .......... 89% 4.22M 1s
## 11400K .......... .......... .......... .......... .......... 89% 2.09M 1s
## 11450K .......... .......... .......... .......... .......... 89% 2.49M 1s
## 11500K .......... .......... .......... .......... .......... 90% 2.13M 1s
## 11550K .......... .......... .......... .......... .......... 90% 2.43M 1s
## 11600K .......... .......... .......... .......... .......... 90% 1.56M 1s
## 11650K .......... .......... .......... .......... .......... 91% 2.34M 1s
## 11700K .......... .......... .......... .......... .......... 91% 1.89M 1s
## 11750K .......... .......... .......... .......... .......... 92% 2.80M 0s
## 11800K .......... .......... .......... .......... .......... 92% 2.18M 0s
## 11850K .......... .......... .......... .......... .......... 92% 1.95M 0s
## 11900K .......... .......... .......... .......... .......... 93% 2.19M 0s
## 11950K .......... .......... .......... .......... .......... 93% 2.44M 0s
## 12000K .......... .......... .......... .......... .......... 94% 1.57M 0s
## 12050K .......... .......... .......... .......... .......... 94% 2.41M 0s
## 12100K .......... .......... .......... .......... .......... 94% 2.07M 0s
## 12150K .......... .......... .......... .......... .......... 95% 2.24M 0s
## 12200K .......... .......... .......... .......... .......... 95% 2.22M 0s
## 12250K .......... .......... .......... .......... .......... 96% 2.15M 0s
## 12300K .......... .......... .......... .......... .......... 96% 1.60M 0s
## 12350K .......... .......... .......... .......... .......... 96% 3.41M 0s
## 12400K .......... .......... .......... .......... .......... 97% 1.67M 0s
## 12450K .......... .......... .......... .......... .......... 97% 2.09M 0s
## 12500K .......... .......... .......... .......... .......... 97% 1.43M 0s
## 12550K .......... .......... .......... .......... .......... 98% 3.87M 0s
## 12600K .......... .......... .......... .......... .......... 98% 2.45M 0s
## 12650K .......... .......... .......... .......... .......... 99% 1.78M 0s
## 12700K .......... .......... .......... .......... .......... 99% 1.65M 0s
## 12750K .......... .......... .......... .......... .......... 99% 1.83M 0s
## 12800K ...... 100% 1.07M=6.1s
##
## 2016-03-02 06:06:30 (2.06 MB/s) - 'seqs_demultiplexed.fna.tar.gz' saved [13113749/13113749]
##
## --2016-03-02 06:06:30-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/mappingfile.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1307 (1.3K) [text/plain]
## Saving to: 'mappingfile.txt'
##
## 0K . 100% 73.3M=0s
##
## 2016-03-02 06:06:31 (73.3 MB/s) - 'mappingfile.txt' saved [1307/1307]
##
## x seqs_demultiplexed.fna
Trim the sequences to a fixed length (130 basepairs for this study) using 2 custom perl scripts to improve OTU selection in UPARSE downstream.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/min_max_length.pl
chmod 775 min_max_length.pl
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/second_min_max_length.pl
chmod 775 second_min_max_length.pl
./min_max_length.pl -min=131 -max=131 -fasta=seqs_revprimer_truncated/seqs_demultiplexed_rev_primer_truncated.fna
./second_min_max_length.pl -min=130 -max=130 -fasta=seqs_trimmed.fasta
## --2016-03-02 06:21:31-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/min_max_length.pl
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1832 (1.8K) [text/plain]
## Saving to: 'min_max_length.pl'
##
## 0K . 100% 125M=0s
##
## 2016-03-02 06:21:32 (125 MB/s) - 'min_max_length.pl' saved [1832/1832]
##
## --2016-03-02 06:21:32-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/second_min_max_length.pl
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1906 (1.9K) [text/plain]
## Saving to: 'second_min_max_length.pl'
##
## 0K . 100% 165M=0s
##
## 2016-03-02 06:21:32 (165 MB/s) - 'second_min_max_length.pl' saved [1906/1906]
##
##
## Input read count: 458998
## Output read count: 397229
##
##
## Input read count: 397230
## Output read count: 397230
Reverse complement the sequences in mothur.
mothur "#reverse.seqs(fasta=seqs_finaltrimmed.fasta)"
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > reverse.seqs(fasta=seqs_finaltrimmed.fasta)
##
## Output File Names:
## seqs_finaltrimmed.rc.fasta
##
## mothur > quit()
Use a custom perl script to convert the fasta file from QIIME format to UPARSE format to generate the OTU table.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/Scripts/qiime_to_usearch.pl
chmod 775 qiime_to_usearch.pl
./qiime_to_usearch.pl -fasta=seqs_finaltrimmed.rc.fasta -prefix=cow
mv format.fasta dairybreeds.format.fasta
Run the sequences through the UPARSE pipeline to pick OTUs.
svn export https://github.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/trunk/Scripts/usearch_python_scripts --non-interactive --trust-server-cert
chmod -R 775 usearch_python_scripts
chmod 775 usearch_python_scripts/uc2otutab.py
chmod 775 usearch_python_scripts/fasta_number.py
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gold.fasta.gz
gzip -d gold.fasta.gz
chmod 775 gold.fasta
mkdir usearch_results
usearch -derep_fulllength dairybreeds.format.fasta -sizeout -fastaout usearch_results/dairybreeds.derep.fasta
usearch -sortbysize usearch_results/dairybreeds.derep.fasta -minsize 2 -fastaout usearch_results/dairybreeds.derep.sort.fasta
usearch -cluster_otus usearch_results/dairybreeds.derep.sort.fasta -otus usearch_results/dairybreeds.otus1.fasta
usearch -uchime_ref usearch_results/dairybreeds.otus1.fasta -db gold.fasta -strand plus -nonchimeras usearch_results/dairybreeds.otus1.nonchimera.fasta
python usearch_python_scripts/fasta_number.py usearch_results/dairybreeds.otus1.nonchimera.fasta > usearch_results/dairybreeds.otus2.fasta
usearch -usearch_global dairybreeds.format.fasta -db usearch_results/dairybreeds.otus2.fasta -strand plus -id 0.97 -uc usearch_results/dairybreeds.otu_map.uc
python usearch_python_scripts/uc2otutab.py usearch_results/dairybreeds.otu_map.uc > usearch_results/dairybreeds.otu_table.txt
cp usearch_results/dairybreeds.otu_table.txt ./
## A usearch_python_scripts
## A usearch_python_scripts/die.py
## A usearch_python_scripts/die.pyc
## A usearch_python_scripts/faqual2fastq.py
## A usearch_python_scripts/faqual2fastq2.py
## A usearch_python_scripts/fasta.py
## A usearch_python_scripts/fasta.pyc
## A usearch_python_scripts/fasta_number.py
## A usearch_python_scripts/fastq.py
## A usearch_python_scripts/fastq_strip_barcode_relabel.py
## A usearch_python_scripts/fastq_strip_barcode_relabel2.py
## A usearch_python_scripts/primer.py
## A usearch_python_scripts/progress.py
## A usearch_python_scripts/progress.pyc
## A usearch_python_scripts/uc.py
## A usearch_python_scripts/uc.pyc
## A usearch_python_scripts/uc2otutab.py
## Exported revision 23.
## --2016-03-02 06:21:45-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gold.fasta.gz
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 3293704 (3.1M) [application/octet-stream]
## Saving to: 'gold.fasta.gz'
##
## 0K .......... .......... .......... .......... .......... 1% 561K 6s
## 50K .......... .......... .......... .......... .......... 3% 560K 6s
## 100K .......... .......... .......... .......... .......... 4% 677K 5s
## 150K .......... .......... .......... .......... .......... 6% 767K 5s
## 200K .......... .......... .......... .......... .......... 7% 902K 4s
## 250K .......... .......... .......... .......... .......... 9% 859K 4s
## 300K .......... .......... .......... .......... .......... 10% 845K 4s
## 350K .......... .......... .......... .......... .......... 12% 525K 4s
## 400K .......... .......... .......... .......... .......... 13% 463K 4s
## 450K .......... .......... .......... .......... .......... 15% 2.57M 4s
## 500K .......... .......... .......... .......... .......... 17% 642K 4s
## 550K .......... .......... .......... .......... .......... 18% 897K 4s
## 600K .......... .......... .......... .......... .......... 20% 1.20M 4s
## 650K .......... .......... .......... .......... .......... 21% 1.41M 3s
## 700K .......... .......... .......... .......... .......... 23% 1021K 3s
## 750K .......... .......... .......... .......... .......... 24% 646K 3s
## 800K .......... .......... .......... .......... .......... 26% 2.18M 3s
## 850K .......... .......... .......... .......... .......... 27% 590K 3s
## 900K .......... .......... .......... .......... .......... 29% 3.22M 3s
## 950K .......... .......... .......... .......... .......... 31% 782K 3s
## 1000K .......... .......... .......... .......... .......... 32% 981K 3s
## 1050K .......... .......... .......... .......... .......... 34% 2.28M 3s
## 1100K .......... .......... .......... .......... .......... 35% 971K 2s
## 1150K .......... .......... .......... .......... .......... 37% 781K 2s
## 1200K .......... .......... .......... .......... .......... 38% 2.28M 2s
## 1250K .......... .......... .......... .......... .......... 40% 1.25M 2s
## 1300K .......... .......... .......... .......... .......... 41% 1.26M 2s
## 1350K .......... .......... .......... .......... .......... 43% 1.56M 2s
## 1400K .......... .......... .......... .......... .......... 45% 1.18M 2s
## 1450K .......... .......... .......... .......... .......... 46% 1.67M 2s
## 1500K .......... .......... .......... .......... .......... 48% 1.40M 2s
## 1550K .......... .......... .......... .......... .......... 49% 1.29M 2s
## 1600K .......... .......... .......... .......... .......... 51% 2.04M 2s
## 1650K .......... .......... .......... .......... .......... 52% 1.38M 2s
## 1700K .......... .......... .......... .......... .......... 54% 1.39M 1s
## 1750K .......... .......... .......... .......... .......... 55% 1.39M 1s
## 1800K .......... .......... .......... .......... .......... 57% 2.50M 1s
## 1850K .......... .......... .......... .......... .......... 59% 1.17M 1s
## 1900K .......... .......... .......... .......... .......... 60% 2.00M 1s
## 1950K .......... .......... .......... .......... .......... 62% 2.24M 1s
## 2000K .......... .......... .......... .......... .......... 63% 1.50M 1s
## 2050K .......... .......... .......... .......... .......... 65% 1.90M 1s
## 2100K .......... .......... .......... .......... .......... 66% 1.19M 1s
## 2150K .......... .......... .......... .......... .......... 68% 1.53M 1s
## 2200K .......... .......... .......... .......... .......... 69% 3.53M 1s
## 2250K .......... .......... .......... .......... .......... 71% 1.55M 1s
## 2300K .......... .......... .......... .......... .......... 73% 1.48M 1s
## 2350K .......... .......... .......... .......... .......... 74% 2.42M 1s
## 2400K .......... .......... .......... .......... .......... 76% 2.10M 1s
## 2450K .......... .......... .......... .......... .......... 77% 837K 1s
## 2500K .......... .......... .......... .......... .......... 79% 1.74M 1s
## 2550K .......... .......... .......... .......... .......... 80% 2.10M 1s
## 2600K .......... .......... .......... .......... .......... 82% 1.76M 0s
## 2650K .......... .......... .......... .......... .......... 83% 1.13M 0s
## 2700K .......... .......... .......... .......... .......... 85% 2.23M 0s
## 2750K .......... .......... .......... .......... .......... 87% 1.10M 0s
## 2800K .......... .......... .......... .......... .......... 88% 1.57M 0s
## 2850K .......... .......... .......... .......... .......... 90% 1.60M 0s
## 2900K .......... .......... .......... .......... .......... 91% 1.32M 0s
## 2950K .......... .......... .......... .......... .......... 93% 1.99M 0s
## 3000K .......... .......... .......... .......... .......... 94% 1.17M 0s
## 3050K .......... .......... .......... .......... .......... 96% 1.09M 0s
## 3100K .......... .......... .......... .......... .......... 97% 1.32M 0s
## 3150K .......... .......... .......... .......... .......... 99% 1.57M 0s
## 3200K .......... ...... 100% 2.24M=2.7s
##
## 2016-03-02 06:21:49 (1.16 MB/s) - 'gold.fasta.gz' saved [3293704/3293704]
##
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading dairybreeds.format.fasta
00:01 108Mb 91.7% Reading dairybreeds.format.fasta
00:01 114Mb 100.0% Reading dairybreeds.format.fasta
## 00:01 104Mb 397231 seqs, 108008 uniques, 82894 singletons (76.7%)
## 00:01 104Mb Min size 1, median 1, max 4275, avg 3.68
## 00:01 97Mb 0.0% Writing usearch_results/dairybreeds.derep.fasta
00:02 97Mb 18.9% Writing usearch_results/dairybreeds.derep.fasta
00:03 97Mb 88.4% Writing usearch_results/dairybreeds.derep.fasta
00:03 97Mb 100.0% Writing usearch_results/dairybreeds.derep.fasta
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.derep.fasta
00:00 40Mb 100.0% Reading usearch_results/dairybreeds.derep.fasta
## 00:00 22Mb Getting sizes
## 00:00 22Mb Sorting 25114 sequences
## 00:00 22Mb 0.0% Writing output
00:01 22Mb 32.3% Writing output
00:01 22Mb 100.0% Writing output
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 2.4Mb 0.1% 0 OTUs, 0 chimeras (0.0%)
00:01 9.9Mb 13.7% 982 OTUs, 170 chimeras (5.0%)
00:02 11Mb 34.0% 1663 OTUs, 682 chimeras (8.0%)
00:03 11Mb 51.4% 2130 OTUs, 1295 chimeras (10.0%)
00:04 12Mb 66.3% 2553 OTUs, 1961 chimeras (11.8%)
00:05 12Mb 80.3% 2899 OTUs, 2627 chimeras (13.0%)
00:06 12Mb 93.0% 3206 OTUs, 3231 chimeras (13.8%)
00:06 13Mb 100.0% 3371 OTUs, 3505 chimeras (14.0%)
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.otus1.fasta
00:00 2.5Mb 100.0% Reading usearch_results/dairybreeds.otus1.fasta
## 00:00 2.0Mb 0.1% Reading gold.fasta
00:00 35Mb 100.0% Reading gold.fasta
## 00:00 19Mb 0.1% Masking
00:01 19Mb 53.6% Masking
00:01 19Mb 100.0% Masking
## 00:01 20Mb 0.1% Word stats
00:01 20Mb 100.0% Word stats
## 00:01 20Mb 100.0% Alloc rows
## 00:01 20Mb 0.1% Build index
00:02 66Mb 19.8% Build index
00:02 76Mb 100.0% Build index
## 00:02 76Mb 0.0% Found 0/1 chimeras (0.0%), 0 not classified (0.0%)
00:03 82Mb 17.7% Found 12/597 chimeras (2.0%), 392 not classified (65.7%)
00:04 94Mb 59.1% Found 66/1993 chimeras (3.3%), 1532 not classified (76.9%)
00:04 104Mb 100.0% Found 110/3371 chimeras (3.3%), 2502 not classified (74.2%)
## 00:04 104Mb 0.0% Writing 3261 non-chimeras and unclassifieds
00:04 104Mb 100.0% Writing 3261 non-chimeras and unclassifieds
## usearch v8.1.1861_i86osx32, 4.0Gb RAM (4.3Gb total), 4 cores
## (C) Copyright 2013-15 Robert C. Edgar, all rights reserved.
## http://drive5.com/usearch
##
## Licensed to: henry.paz@huskers.unl.edu
##
## 00:00 1.3Mb 0.1% Reading usearch_results/dairybreeds.otus2.fasta
00:00 2.4Mb 100.0% Reading usearch_results/dairybreeds.otus2.fasta
## 00:00 1.9Mb 0.1% Masking
00:00 1.9Mb 100.0% Masking
## 00:00 2.7Mb 0.1% Word stats
00:00 2.8Mb 100.0% Word stats
## 00:00 2.8Mb 100.0% Alloc rows
## 00:00 2.8Mb 0.1% Build index
00:00 4.4Mb 100.0% Build index
## 00:00 4.4Mb 0.1% Searching, 0.0% matched
00:01 39Mb 7.2% Searching, 86.4% matched
00:02 39Mb 15.1% Searching, 86.5% matched
00:03 39Mb 24.0% Searching, 86.5% matched
00:04 39Mb 34.2% Searching, 86.6% matched
00:05 39Mb 44.5% Searching, 86.8% matched
00:06 39Mb 54.8% Searching, 86.9% matched
00:07 39Mb 65.2% Searching, 86.9% matched
00:08 39Mb 75.5% Searching, 86.9% matched
00:09 39Mb 86.1% Searching, 87.0% matched
00:10 39Mb 96.8% Searching, 87.1% matched
00:10 39Mb 100.0% Searching, 87.1% matched
## usearch_results/dairybreeds.otu_map.uc 0.0%
usearch_results/dairybreeds.otu_map.uc 12.4%
usearch_results/dairybreeds.otu_map.uc 24.2%
usearch_results/dairybreeds.otu_map.uc 37.0%
usearch_results/dairybreeds.otu_map.uc 49.7%
usearch_results/dairybreeds.otu_map.uc 62.5%
usearch_results/dairybreeds.otu_map.uc 75.0%
usearch_results/dairybreeds.otu_map.uc 87.8%
usearch_results/dairybreeds.otu_map.uc 100.0%
Assign taxonomy to the OTU representative sequences:
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gg_13_5_otus.tar.gz
tar -zxvf gg_13_5_otus.tar.gz
assign_taxonomy.py -i usearch_results/dairybreeds.otus2.fasta -t gg_13_5_otus/97_otu_taxonomy.txt -r gg_13_5_otus/97_otus.fasta -o assigned_gg_taxa
## --2016-03-02 06:22:22-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/gg_13_5_otus.tar.gz
## Resolving raw.githubusercontent.com... 23.235.40.133
## Connecting to raw.githubusercontent.com|23.235.40.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 30969823 (30M) [application/octet-stream]
## Saving to: 'gg_13_5_otus.tar.gz'
##
## 0K .......... .......... .......... .......... .......... 0% 568K 53s
## 50K .......... .......... .......... .......... .......... 0% 599K 52s
## 100K .......... .......... .......... .......... .......... 0% 682K 49s
## 150K .......... .......... .......... .......... .......... 0% 715K 47s
## 200K .......... .......... .......... .......... .......... 0% 810K 45s
## 250K .......... .......... .......... .......... .......... 0% 789K 44s
## 300K .......... .......... .......... .......... .......... 1% 864K 43s
## 350K .......... .......... .......... .......... .......... 1% 718K 42s
## 400K .......... .......... .......... .......... .......... 1% 928K 41s
## 450K .......... .......... .......... .......... .......... 1% 710K 41s
## 500K .......... .......... .......... .......... .......... 1% 449K 43s
## 550K .......... .......... .......... .......... .......... 1% 776K 43s
## 600K .......... .......... .......... .......... .......... 2% 891K 42s
## 650K .......... .......... .......... .......... .......... 2% 1.45M 40s
## 700K .......... .......... .......... .......... .......... 2% 1.08M 39s
## 750K .......... .......... .......... .......... .......... 2% 806K 39s
## 800K .......... .......... .......... .......... .......... 2% 854K 39s
## 850K .......... .......... .......... .......... .......... 2% 1.54M 38s
## 900K .......... .......... .......... .......... .......... 3% 718K 38s
## 950K .......... .......... .......... .......... .......... 3% 1.79M 37s
## 1000K .......... .......... .......... .......... .......... 3% 1014K 36s
## 1050K .......... .......... .......... .......... .......... 3% 1.01M 36s
## 1100K .......... .......... .......... .......... .......... 3% 2.20M 35s
## 1150K .......... .......... .......... .......... .......... 3% 935K 34s
## 1200K .......... .......... .......... .......... .......... 4% 987K 34s
## 1250K .......... .......... .......... .......... .......... 4% 1.50M 34s
## 1300K .......... .......... .......... .......... .......... 4% 984K 33s
## 1350K .......... .......... .......... .......... .......... 4% 2.51M 33s
## 1400K .......... .......... .......... .......... .......... 4% 1.06M 32s
## 1450K .......... .......... .......... .......... .......... 4% 2.17M 32s
## 1500K .......... .......... .......... .......... .......... 5% 1.25M 31s
## 1550K .......... .......... .......... .......... .......... 5% 1.32M 31s
## 1600K .......... .......... .......... .......... .......... 5% 1.45M 30s
## 1650K .......... .......... .......... .......... .......... 5% 1.52M 30s
## 1700K .......... .......... .......... .......... .......... 5% 1.77M 30s
## 1750K .......... .......... .......... .......... .......... 5% 1.42M 29s
## 1800K .......... .......... .......... .......... .......... 6% 1.54M 29s
## 1850K .......... .......... .......... .......... .......... 6% 2.19M 28s
## 1900K .......... .......... .......... .......... .......... 6% 1.90M 28s
## 1950K .......... .......... .......... .......... .......... 6% 990K 28s
## 2000K .......... .......... .......... .......... .......... 6% 747K 28s
## 2050K .......... .......... .......... .......... .......... 6% 1.89M 28s
## 2100K .......... .......... .......... .......... .......... 7% 2.66M 27s
## 2150K .......... .......... .......... .......... .......... 7% 1.67M 27s
## 2200K .......... .......... .......... .......... .......... 7% 2.12M 27s
## 2250K .......... .......... .......... .......... .......... 7% 2.17M 26s
## 2300K .......... .......... .......... .......... .......... 7% 1.35M 26s
## 2350K .......... .......... .......... .......... .......... 7% 1.34M 26s
## 2400K .......... .......... .......... .......... .......... 8% 1.73M 26s
## 2450K .......... .......... .......... .......... .......... 8% 1.08M 26s
## 2500K .......... .......... .......... .......... .......... 8% 2.55M 25s
## 2550K .......... .......... .......... .......... .......... 8% 2.08M 25s
## 2600K .......... .......... .......... .......... .......... 8% 1.08M 25s
## 2650K .......... .......... .......... .......... .......... 8% 2.07M 25s
## 2700K .......... .......... .......... .......... .......... 9% 2.77M 24s
## 2750K .......... .......... .......... .......... .......... 9% 942K 24s
## 2800K .......... .......... .......... .......... .......... 9% 2.72M 24s
## 2850K .......... .......... .......... .......... .......... 9% 1.60M 24s
## 2900K .......... .......... .......... .......... .......... 9% 1.85M 24s
## 2950K .......... .......... .......... .......... .......... 9% 1.91M 24s
## 3000K .......... .......... .......... .......... .......... 10% 1.74M 23s
## 3050K .......... .......... .......... .......... .......... 10% 2.95M 23s
## 3100K .......... .......... .......... .......... .......... 10% 2.03M 23s
## 3150K .......... .......... .......... .......... .......... 10% 1.32M 23s
## 3200K .......... .......... .......... .......... .......... 10% 2.47M 23s
## 3250K .......... .......... .......... .......... .......... 10% 2.21M 22s
## 3300K .......... .......... .......... .......... .......... 11% 2.11M 22s
## 3350K .......... .......... .......... .......... .......... 11% 1.30M 22s
## 3400K .......... .......... .......... .......... .......... 11% 1.93M 22s
## 3450K .......... .......... .......... .......... .......... 11% 688K 22s
## 3500K .......... .......... .......... .......... .......... 11% 2.12M 22s
## 3550K .......... .......... .......... .......... .......... 11% 1.71M 22s
## 3600K .......... .......... .......... .......... .......... 12% 2.10M 22s
## 3650K .......... .......... .......... .......... .......... 12% 1.74M 22s
## 3700K .......... .......... .......... .......... .......... 12% 1.28M 21s
## 3750K .......... .......... .......... .......... .......... 12% 2.90M 21s
## 3800K .......... .......... .......... .......... .......... 12% 2.10M 21s
## 3850K .......... .......... .......... .......... .......... 12% 2.14M 21s
## 3900K .......... .......... .......... .......... .......... 13% 2.10M 21s
## 3950K .......... .......... .......... .......... .......... 13% 1.78M 21s
## 4000K .......... .......... .......... .......... .......... 13% 2.09M 21s
## 4050K .......... .......... .......... .......... .......... 13% 2.09M 20s
## 4100K .......... .......... .......... .......... .......... 13% 1.02M 20s
## 4150K .......... .......... .......... .......... .......... 13% 11.0M 20s
## 4200K .......... .......... .......... .......... .......... 14% 3.37M 20s
## 4250K .......... .......... .......... .......... .......... 14% 2.24M 20s
## 4300K .......... .......... .......... .......... .......... 14% 1.01M 20s
## 4350K .......... .......... .......... .......... .......... 14% 1.61M 20s
## 4400K .......... .......... .......... .......... .......... 14% 2.11M 20s
## 4450K .......... .......... .......... .......... .......... 14% 2.12M 20s
## 4500K .......... .......... .......... .......... .......... 15% 2.16M 19s
## 4550K .......... .......... .......... .......... .......... 15% 2.15M 19s
## 4600K .......... .......... .......... .......... .......... 15% 2.16M 19s
## 4650K .......... .......... .......... .......... .......... 15% 2.05M 19s
## 4700K .......... .......... .......... .......... .......... 15% 2.54M 19s
## 4750K .......... .......... .......... .......... .......... 15% 1.50M 19s
## 4800K .......... .......... .......... .......... .......... 16% 2.43M 19s
## 4850K .......... .......... .......... .......... .......... 16% 1.63M 19s
## 4900K .......... .......... .......... .......... .......... 16% 2.30M 19s
## 4950K .......... .......... .......... .......... .......... 16% 2.07M 18s
## 5000K .......... .......... .......... .......... .......... 16% 2.24M 18s
## 5050K .......... .......... .......... .......... .......... 16% 2.10M 18s
## 5100K .......... .......... .......... .......... .......... 17% 1.93M 18s
## 5150K .......... .......... .......... .......... .......... 17% 1.95M 18s
## 5200K .......... .......... .......... .......... .......... 17% 1.93M 18s
## 5250K .......... .......... .......... .......... .......... 17% 1012K 18s
## 5300K .......... .......... .......... .......... .......... 17% 6.39M 18s
## 5350K .......... .......... .......... .......... .......... 17% 3.31M 18s
## 5400K .......... .......... .......... .......... .......... 18% 2.55M 18s
## 5450K .......... .......... .......... .......... .......... 18% 2.58M 17s
## 5500K .......... .......... .......... .......... .......... 18% 2.08M 17s
## 5550K .......... .......... .......... .......... .......... 18% 1.54M 17s
## 5600K .......... .......... .......... .......... .......... 18% 2.12M 17s
## 5650K .......... .......... .......... .......... .......... 18% 2.49M 17s
## 5700K .......... .......... .......... .......... .......... 19% 2.21M 17s
## 5750K .......... .......... .......... .......... .......... 19% 1.61M 17s
## 5800K .......... .......... .......... .......... .......... 19% 2.86M 17s
## 5850K .......... .......... .......... .......... .......... 19% 1.93M 17s
## 5900K .......... .......... .......... .......... .......... 19% 2.30M 17s
## 5950K .......... .......... .......... .......... .......... 19% 1.69M 17s
## 6000K .......... .......... .......... .......... .......... 20% 1.79M 17s
## 6050K .......... .......... .......... .......... .......... 20% 2.90M 17s
## 6100K .......... .......... .......... .......... .......... 20% 2.11M 16s
## 6150K .......... .......... .......... .......... .......... 20% 2.43M 16s
## 6200K .......... .......... .......... .......... .......... 20% 2.06M 16s
## 6250K .......... .......... .......... .......... .......... 20% 2.48M 16s
## 6300K .......... .......... .......... .......... .......... 20% 1.89M 16s
## 6350K .......... .......... .......... .......... .......... 21% 1.68M 16s
## 6400K .......... .......... .......... .......... .......... 21% 2.10M 16s
## 6450K .......... .......... .......... .......... .......... 21% 2.39M 16s
## 6500K .......... .......... .......... .......... .......... 21% 2.14M 16s
## 6550K .......... .......... .......... .......... .......... 21% 1.53M 16s
## 6600K .......... .......... .......... .......... .......... 21% 3.08M 16s
## 6650K .......... .......... .......... .......... .......... 22% 2.58M 16s
## 6700K .......... .......... .......... .......... .......... 22% 1.64M 16s
## 6750K .......... .......... .......... .......... .......... 22% 2.21M 16s
## 6800K .......... .......... .......... .......... .......... 22% 1001K 16s
## 6850K .......... .......... .......... .......... .......... 22% 3.67M 15s
## 6900K .......... .......... .......... .......... .......... 22% 1.81M 15s
## 6950K .......... .......... .......... .......... .......... 23% 3.42M 15s
## 7000K .......... .......... .......... .......... .......... 23% 3.38M 15s
## 7050K .......... .......... .......... .......... .......... 23% 2.39M 15s
## 7100K .......... .......... .......... .......... .......... 23% 2.29M 15s
## 7150K .......... .......... .......... .......... .......... 23% 1.44M 15s
## 7200K .......... .......... .......... .......... .......... 23% 3.26M 15s
## 7250K .......... .......... .......... .......... .......... 24% 1.58M 15s
## 7300K .......... .......... .......... .......... .......... 24% 3.15M 15s
## 7350K .......... .......... .......... .......... .......... 24% 1.86M 15s
## 7400K .......... .......... .......... .......... .......... 24% 2.59M 15s
## 7450K .......... .......... .......... .......... .......... 24% 2.17M 15s
## 7500K .......... .......... .......... .......... .......... 24% 1.62M 15s
## 7550K .......... .......... .......... .......... .......... 25% 2.17M 15s
## 7600K .......... .......... .......... .......... .......... 25% 1.98M 14s
## 7650K .......... .......... .......... .......... .......... 25% 1.74M 14s
## 7700K .......... .......... .......... .......... .......... 25% 2.48M 14s
## 7750K .......... .......... .......... .......... .......... 25% 2.80M 14s
## 7800K .......... .......... .......... .......... .......... 25% 2.08M 14s
## 7850K .......... .......... .......... .......... .......... 26% 2.52M 14s
## 7900K .......... .......... .......... .......... .......... 26% 2.15M 14s
## 7950K .......... .......... .......... .......... .......... 26% 1.57M 14s
## 8000K .......... .......... .......... .......... .......... 26% 1.88M 14s
## 8050K .......... .......... .......... .......... .......... 26% 2.54M 14s
## 8100K .......... .......... .......... .......... .......... 26% 2.34M 14s
## 8150K .......... .......... .......... .......... .......... 27% 1.57M 14s
## 8200K .......... .......... .......... .......... .......... 27% 3.01M 14s
## 8250K .......... .......... .......... .......... .......... 27% 2.23M 14s
## 8300K .......... .......... .......... .......... .......... 27% 2.47M 14s
## 8350K .......... .......... .......... .......... .......... 27% 1.41M 14s
## 8400K .......... .......... .......... .......... .......... 27% 1.81M 14s
## 8450K .......... .......... .......... .......... .......... 28% 3.01M 14s
## 8500K .......... .......... .......... .......... .......... 28% 1.56M 14s
## 8550K .......... .......... .......... .......... .......... 28% 4.24M 13s
## 8600K .......... .......... .......... .......... .......... 28% 1.99M 13s
## 8650K .......... .......... .......... .......... .......... 28% 2.20M 13s
## 8700K .......... .......... .......... .......... .......... 28% 1.84M 13s
## 8750K .......... .......... .......... .......... .......... 29% 2.09M 13s
## 8800K .......... .......... .......... .......... .......... 29% 1.87M 13s
## 8850K .......... .......... .......... .......... .......... 29% 2.28M 13s
## 8900K .......... .......... .......... .......... .......... 29% 1.89M 13s
## 8950K .......... .......... .......... .......... .......... 29% 2.85M 13s
## 9000K .......... .......... .......... .......... .......... 29% 2.18M 13s
## 9050K .......... .......... .......... .......... .......... 30% 1.88M 13s
## 9100K .......... .......... .......... .......... .......... 30% 1.42M 13s
## 9150K .......... .......... .......... .......... .......... 30% 2.85M 13s
## 9200K .......... .......... .......... .......... .......... 30% 2.33M 13s
## 9250K .......... .......... .......... .......... .......... 30% 2.11M 13s
## 9300K .......... .......... .......... .......... .......... 30% 2.08M 13s
## 9350K .......... .......... .......... .......... .......... 31% 2.23M 13s
## 9400K .......... .......... .......... .......... .......... 31% 1.99M 13s
## 9450K .......... .......... .......... .......... .......... 31% 1.90M 13s
## 9500K .......... .......... .......... .......... .......... 31% 3.27M 13s
## 9550K .......... .......... .......... .......... .......... 31% 1.34M 13s
## 9600K .......... .......... .......... .......... .......... 31% 2.23M 12s
## 9650K .......... .......... .......... .......... .......... 32% 1.85M 12s
## 9700K .......... .......... .......... .......... .......... 32% 2.41M 12s
## 9750K .......... .......... .......... .......... .......... 32% 1.87M 12s
## 9800K .......... .......... .......... .......... .......... 32% 3.35M 12s
## 9850K .......... .......... .......... .......... .......... 32% 2.17M 12s
## 9900K .......... .......... .......... .......... .......... 32% 1.78M 12s
## 9950K .......... .......... .......... .......... .......... 33% 1.86M 12s
## 10000K .......... .......... .......... .......... .......... 33% 2.14M 12s
## 10050K .......... .......... .......... .......... .......... 33% 2.35M 12s
## 10100K .......... .......... .......... .......... .......... 33% 1.76M 12s
## 10150K .......... .......... .......... .......... .......... 33% 2.64M 12s
## 10200K .......... .......... .......... .......... .......... 33% 2.05M 12s
## 10250K .......... .......... .......... .......... .......... 34% 2.38M 12s
## 10300K .......... .......... .......... .......... .......... 34% 2.22M 12s
## 10350K .......... .......... .......... .......... .......... 34% 1.62M 12s
## 10400K .......... .......... .......... .......... .......... 34% 1.87M 12s
## 10450K .......... .......... .......... .......... .......... 34% 2.67M 12s
## 10500K .......... .......... .......... .......... .......... 34% 2.29M 12s
## 10550K .......... .......... .......... .......... .......... 35% 1.60M 12s
## 10600K .......... .......... .......... .......... .......... 35% 2.55M 12s
## 10650K .......... .......... .......... .......... .......... 35% 2.60M 12s
## 10700K .......... .......... .......... .......... .......... 35% 2.12M 12s
## 10750K .......... .......... .......... .......... .......... 35% 1.32M 11s
## 10800K .......... .......... .......... .......... .......... 35% 923K 12s
## 10850K .......... .......... .......... .......... .......... 36% 2.03M 11s
## 10900K .......... .......... .......... .......... .......... 36% 1.16M 11s
## 10950K .......... .......... .......... .......... .......... 36% 31.2M 11s
## 11000K .......... .......... .......... .......... .......... 36% 3.84M 11s
## 11050K .......... .......... .......... .......... .......... 36% 3.26M 11s
## 11100K .......... .......... .......... .......... .......... 36% 5.69M 11s
## 11150K .......... .......... .......... .......... .......... 37% 2.14M 11s
## 11200K .......... .......... .......... .......... .......... 37% 2.09M 11s
## 11250K .......... .......... .......... .......... .......... 37% 2.17M 11s
## 11300K .......... .......... .......... .......... .......... 37% 2.42M 11s
## 11350K .......... .......... .......... .......... .......... 37% 1.98M 11s
## 11400K .......... .......... .......... .......... .......... 37% 2.40M 11s
## 11450K .......... .......... .......... .......... .......... 38% 2.02M 11s
## 11500K .......... .......... .......... .......... .......... 38% 1.81M 11s
## 11550K .......... .......... .......... .......... .......... 38% 2.17M 11s
## 11600K .......... .......... .......... .......... .......... 38% 2.13M 11s
## 11650K .......... .......... .......... .......... .......... 38% 1.86M 11s
## 11700K .......... .......... .......... .......... .......... 38% 2.28M 11s
## 11750K .......... .......... .......... .......... .......... 39% 2.44M 11s
## 11800K .......... .......... .......... .......... .......... 39% 1.42M 11s
## 11850K .......... .......... .......... .......... .......... 39% 2.18M 11s
## 11900K .......... .......... .......... .......... .......... 39% 3.81M 11s
## 11950K .......... .......... .......... .......... .......... 39% 1.71M 11s
## 12000K .......... .......... .......... .......... .......... 39% 2.24M 11s
## 12050K .......... .......... .......... .......... .......... 40% 2.24M 10s
## 12100K .......... .......... .......... .......... .......... 40% 2.07M 10s
## 12150K .......... .......... .......... .......... .......... 40% 2.35M 10s
## 12200K .......... .......... .......... .......... .......... 40% 2.03M 10s
## 12250K .......... .......... .......... .......... .......... 40% 1.82M 10s
## 12300K .......... .......... .......... .......... .......... 40% 2.10M 10s
## 12350K .......... .......... .......... .......... .......... 40% 2.03M 10s
## 12400K .......... .......... .......... .......... .......... 41% 2.34M 10s
## 12450K .......... .......... .......... .......... .......... 41% 1.86M 10s
## 12500K .......... .......... .......... .......... .......... 41% 1.86M 10s
## 12550K .......... .......... .......... .......... .......... 41% 2.41M 10s
## 12600K .......... .......... .......... .......... .......... 41% 2.29M 10s
## 12650K .......... .......... .......... .......... .......... 41% 2.42M 10s
## 12700K .......... .......... .......... .......... .......... 42% 2.20M 10s
## 12750K .......... .......... .......... .......... .......... 42% 1.40M 10s
## 12800K .......... .......... .......... .......... .......... 42% 1.57M 10s
## 12850K .......... .......... .......... .......... .......... 42% 5.68M 10s
## 12900K .......... .......... .......... .......... .......... 42% 2.17M 10s
## 12950K .......... .......... .......... .......... .......... 42% 1.43M 10s
## 13000K .......... .......... .......... .......... .......... 43% 2.91M 10s
## 13050K .......... .......... .......... .......... .......... 43% 2.62M 10s
## 13100K .......... .......... .......... .......... .......... 43% 1.89M 10s
## 13150K .......... .......... .......... .......... .......... 43% 722K 10s
## 13200K .......... .......... .......... .......... .......... 43% 4.72M 10s
## 13250K .......... .......... .......... .......... .......... 43% 2.76M 10s
## 13300K .......... .......... .......... .......... .......... 44% 2.01M 10s
## 13350K .......... .......... .......... .......... .......... 44% 2.09M 10s
## 13400K .......... .......... .......... .......... .......... 44% 1.68M 10s
## 13450K .......... .......... .......... .......... .......... 44% 3.47M 10s
## 13500K .......... .......... .......... .......... .......... 44% 2.24M 9s
## 13550K .......... .......... .......... .......... .......... 44% 1.15M 9s
## 13600K .......... .......... .......... .......... .......... 45% 1.08M 9s
## 13650K .......... .......... .......... .......... .......... 45% 4.68M 9s
## 13700K .......... .......... .......... .......... .......... 45% 2.51M 9s
## 13750K .......... .......... .......... .......... .......... 45% 2.17M 9s
## 13800K .......... .......... .......... .......... .......... 45% 2.54M 9s
## 13850K .......... .......... .......... .......... .......... 45% 2.15M 9s
## 13900K .......... .......... .......... .......... .......... 46% 2.12M 9s
## 13950K .......... .......... .......... .......... .......... 46% 1.65M 9s
## 14000K .......... .......... .......... .......... .......... 46% 2.17M 9s
## 14050K .......... .......... .......... .......... .......... 46% 2.15M 9s
## 14100K .......... .......... .......... .......... .......... 46% 2.37M 9s
## 14150K .......... .......... .......... .......... .......... 46% 2.14M 9s
## 14200K .......... .......... .......... .......... .......... 47% 2.01M 9s
## 14250K .......... .......... .......... .......... .......... 47% 1.96M 9s
## 14300K .......... .......... .......... .......... .......... 47% 1.32M 9s
## 14350K .......... .......... .......... .......... .......... 47% 1.24M 9s
## 14400K .......... .......... .......... .......... .......... 47% 3.21M 9s
## 14450K .......... .......... .......... .......... .......... 47% 2.76M 9s
## 14500K .......... .......... .......... .......... .......... 48% 1.57M 9s
## 14550K .......... .......... .......... .......... .......... 48% 1.92M 9s
## 14600K .......... .......... .......... .......... .......... 48% 1.58M 9s
## 14650K .......... .......... .......... .......... .......... 48% 1.46M 9s
## 14700K .......... .......... .......... .......... .......... 48% 9.03M 9s
## 14750K .......... .......... .......... .......... .......... 48% 2.32M 9s
## 14800K .......... .......... .......... .......... .......... 49% 911K 9s
## 14850K .......... .......... .......... .......... .......... 49% 5.20M 9s
## 14900K .......... .......... .......... .......... .......... 49% 5.11M 9s
## 14950K .......... .......... .......... .......... .......... 49% 1.92M 9s
## 15000K .......... .......... .......... .......... .......... 49% 778K 9s
## 15050K .......... .......... .......... .......... .......... 49% 7.76M 9s
## 15100K .......... .......... .......... .......... .......... 50% 5.00M 8s
## 15150K .......... .......... .......... .......... .......... 50% 4.21M 8s
## 15200K .......... .......... .......... .......... .......... 50% 1.68M 8s
## 15250K .......... .......... .......... .......... .......... 50% 6.65M 8s
## 15300K .......... .......... .......... .......... .......... 50% 1.71M 8s
## 15350K .......... .......... .......... .......... .......... 50% 2.36M 8s
## 15400K .......... .......... .......... .......... .......... 51% 1.17M 8s
## 15450K .......... .......... .......... .......... .......... 51% 1.78M 8s
## 15500K .......... .......... .......... .......... .......... 51% 1.86M 8s
## 15550K .......... .......... .......... .......... .......... 51% 4.83M 8s
## 15600K .......... .......... .......... .......... .......... 51% 2.88M 8s
## 15650K .......... .......... .......... .......... .......... 51% 1.92M 8s
## 15700K .......... .......... .......... .......... .......... 52% 3.36M 8s
## 15750K .......... .......... .......... .......... .......... 52% 1.60M 8s
## 15800K .......... .......... .......... .......... .......... 52% 3.18M 8s
## 15850K .......... .......... .......... .......... .......... 52% 2.00M 8s
## 15900K .......... .......... .......... .......... .......... 52% 2.09M 8s
## 15950K .......... .......... .......... .......... .......... 52% 1.39M 8s
## 16000K .......... .......... .......... .......... .......... 53% 2.88M 8s
## 16050K .......... .......... .......... .......... .......... 53% 1.92M 8s
## 16100K .......... .......... .......... .......... .......... 53% 1.51M 8s
## 16150K .......... .......... .......... .......... .......... 53% 1.59M 8s
## 16200K .......... .......... .......... .......... .......... 53% 1.48M 8s
## 16250K .......... .......... .......... .......... .......... 53% 1.82M 8s
## 16300K .......... .......... .......... .......... .......... 54% 1.41M 8s
## 16350K .......... .......... .......... .......... .......... 54% 3.79M 8s
## 16400K .......... .......... .......... .......... .......... 54% 6.13M 8s
## 16450K .......... .......... .......... .......... .......... 54% 2.00M 8s
## 16500K .......... .......... .......... .......... .......... 54% 1.49M 8s
## 16550K .......... .......... .......... .......... .......... 54% 1.23M 8s
## 16600K .......... .......... .......... .......... .......... 55% 30.2M 8s
## 16650K .......... .......... .......... .......... .......... 55% 3.91M 7s
## 16700K .......... .......... .......... .......... .......... 55% 2.53M 7s
## 16750K .......... .......... .......... .......... .......... 55% 1.79M 7s
## 16800K .......... .......... .......... .......... .......... 55% 1.59M 7s
## 16850K .......... .......... .......... .......... .......... 55% 1.43M 7s
## 16900K .......... .......... .......... .......... .......... 56% 2.95M 7s
## 16950K .......... .......... .......... .......... .......... 56% 958K 7s
## 17000K .......... .......... .......... .......... .......... 56% 11.2M 7s
## 17050K .......... .......... .......... .......... .......... 56% 5.95M 7s
## 17100K .......... .......... .......... .......... .......... 56% 5.37M 7s
## 17150K .......... .......... .......... .......... .......... 56% 1.39M 7s
## 17200K .......... .......... .......... .......... .......... 57% 1.68M 7s
## 17250K .......... .......... .......... .......... .......... 57% 1.82M 7s
## 17300K .......... .......... .......... .......... .......... 57% 1.84M 7s
## 17350K .......... .......... .......... .......... .......... 57% 992K 7s
## 17400K .......... .......... .......... .......... .......... 57% 1.70M 7s
## 17450K .......... .......... .......... .......... .......... 57% 6.29M 7s
## 17500K .......... .......... .......... .......... .......... 58% 4.39M 7s
## 17550K .......... .......... .......... .......... .......... 58% 2.00M 7s
## 17600K .......... .......... .......... .......... .......... 58% 3.05M 7s
## 17650K .......... .......... .......... .......... .......... 58% 2.23M 7s
## 17700K .......... .......... .......... .......... .......... 58% 2.18M 7s
## 17750K .......... .......... .......... .......... .......... 58% 1.40M 7s
## 17800K .......... .......... .......... .......... .......... 59% 1.46M 7s
## 17850K .......... .......... .......... .......... .......... 59% 2.14M 7s
## 17900K .......... .......... .......... .......... .......... 59% 3.26M 7s
## 17950K .......... .......... .......... .......... .......... 59% 1.41M 7s
## 18000K .......... .......... .......... .......... .......... 59% 3.25M 7s
## 18050K .......... .......... .......... .......... .......... 59% 160K 7s
## 18100K .......... .......... .......... .......... .......... 60% 39.9M 7s
## 18150K .......... .......... .......... .......... .......... 60% 41.5M 7s
## 18200K .......... .......... .......... .......... .......... 60% 50.4M 7s
## 18250K .......... .......... .......... .......... .......... 60% 55.6M 7s
## 18300K .......... .......... .......... .......... .......... 60% 51.4M 7s
## 18350K .......... .......... .......... .......... .......... 60% 45.4M 7s
## 18400K .......... .......... .......... .......... .......... 61% 46.0M 7s
## 18450K .......... .......... .......... .......... .......... 61% 57.4M 6s
## 18500K .......... .......... .......... .......... .......... 61% 55.5M 6s
## 18550K .......... .......... .......... .......... .......... 61% 51.7M 6s
## 18600K .......... .......... .......... .......... .......... 61% 55.8M 6s
## 18650K .......... .......... .......... .......... .......... 61% 50.2M 6s
## 18700K .......... .......... .......... .......... .......... 61% 49.3M 6s
## 18750K .......... .......... .......... .......... .......... 62% 1.78M 6s
## 18800K .......... .......... .......... .......... .......... 62% 2.21M 6s
## 18850K .......... .......... .......... .......... .......... 62% 1.84M 6s
## 18900K .......... .......... .......... .......... .......... 62% 2.19M 6s
## 18950K .......... .......... .......... .......... .......... 62% 2.48M 6s
## 19000K .......... .......... .......... .......... .......... 62% 2.12M 6s
## 19050K .......... .......... .......... .......... .......... 63% 2.45M 6s
## 19100K .......... .......... .......... .......... .......... 63% 1.93M 6s
## 19150K .......... .......... .......... .......... .......... 63% 1.50M 6s
## 19200K .......... .......... .......... .......... .......... 63% 2.35M 6s
## 19250K .......... .......... .......... .......... .......... 63% 1.48M 6s
## 19300K .......... .......... .......... .......... .......... 63% 3.58M 6s
## 19350K .......... .......... .......... .......... .......... 64% 2.13M 6s
## 19400K .......... .......... .......... .......... .......... 64% 2.63M 6s
## 19450K .......... .......... .......... .......... .......... 64% 1.99M 6s
## 19500K .......... .......... .......... .......... .......... 64% 2.34M 6s
## 19550K .......... .......... .......... .......... .......... 64% 1.67M 6s
## 19600K .......... .......... .......... .......... .......... 64% 2.27M 6s
## 19650K .......... .......... .......... .......... .......... 65% 2.06M 6s
## 19700K .......... .......... .......... .......... .......... 65% 2.11M 6s
## 19750K .......... .......... .......... .......... .......... 65% 1.98M 6s
## 19800K .......... .......... .......... .......... .......... 65% 1.83M 6s
## 19850K .......... .......... .......... .......... .......... 65% 3.05M 6s
## 19900K .......... .......... .......... .......... .......... 65% 2.31M 6s
## 19950K .......... .......... .......... .......... .......... 66% 1.53M 6s
## 20000K .......... .......... .......... .......... .......... 66% 2.17M 5s
## 20050K .......... .......... .......... .......... .......... 66% 2.43M 5s
## 20100K .......... .......... .......... .......... .......... 66% 2.23M 5s
## 20150K .......... .......... .......... .......... .......... 66% 1.34M 5s
## 20200K .......... .......... .......... .......... .......... 66% 4.22M 5s
## 20250K .......... .......... .......... .......... .......... 67% 1.83M 5s
## 20300K .......... .......... .......... .......... .......... 67% 2.47M 5s
## 20350K .......... .......... .......... .......... .......... 67% 1.88M 5s
## 20400K .......... .......... .......... .......... .......... 67% 1.82M 5s
## 20450K .......... .......... .......... .......... .......... 67% 2.04M 5s
## 20500K .......... .......... .......... .......... .......... 67% 1.94M 5s
## 20550K .......... .......... .......... .......... .......... 68% 2.55M 5s
## 20600K .......... .......... .......... .......... .......... 68% 2.22M 5s
## 20650K .......... .......... .......... .......... .......... 68% 2.41M 5s
## 20700K .......... .......... .......... .......... .......... 68% 1.75M 5s
## 20750K .......... .......... .......... .......... .......... 68% 2.31M 5s
## 20800K .......... .......... .......... .......... .......... 68% 2.20M 5s
## 20850K .......... .......... .......... .......... .......... 69% 1.94M 5s
## 20900K .......... .......... .......... .......... .......... 69% 2.30M 5s
## 20950K .......... .......... .......... .......... .......... 69% 2.24M 5s
## 21000K .......... .......... .......... .......... .......... 69% 2.01M 5s
## 21050K .......... .......... .......... .......... .......... 69% 2.29M 5s
## 21100K .......... .......... .......... .......... .......... 69% 2.32M 5s
## 21150K .......... .......... .......... .......... .......... 70% 1.62M 5s
## 21200K .......... .......... .......... .......... .......... 70% 1.83M 5s
## 21250K .......... .......... .......... .......... .......... 70% 1.85M 5s
## 21300K .......... .......... .......... .......... .......... 70% 3.33M 5s
## 21350K .......... .......... .......... .......... .......... 70% 1.75M 5s
## 21400K .......... .......... .......... .......... .......... 70% 3.09M 5s
## 21450K .......... .......... .......... .......... .......... 71% 2.06M 5s
## 21500K .......... .......... .......... .......... .......... 71% 2.18M 5s
## 21550K .......... .......... .......... .......... .......... 71% 1.58M 5s
## 21600K .......... .......... .......... .......... .......... 71% 2.22M 5s
## 21650K .......... .......... .......... .......... .......... 71% 2.11M 5s
## 21700K .......... .......... .......... .......... .......... 71% 1.51M 5s
## 21750K .......... .......... .......... .......... .......... 72% 4.24M 5s
## 21800K .......... .......... .......... .......... .......... 72% 2.08M 4s
## 21850K .......... .......... .......... .......... .......... 72% 2.37M 4s
## 21900K .......... .......... .......... .......... .......... 72% 2.13M 4s
## 21950K .......... .......... .......... .......... .......... 72% 1.54M 4s
## 22000K .......... .......... .......... .......... .......... 72% 1.78M 4s
## 22050K .......... .......... .......... .......... .......... 73% 2.93M 4s
## 22100K .......... .......... .......... .......... .......... 73% 2.12M 4s
## 22150K .......... .......... .......... .......... .......... 73% 807K 4s
## 22200K .......... .......... .......... .......... .......... 73% 2.87M 4s
## 22250K .......... .......... .......... .......... .......... 73% 30.1M 4s
## 22300K .......... .......... .......... .......... .......... 73% 3.64M 4s
## 22350K .......... .......... .......... .......... .......... 74% 1.39M 4s
## 22400K .......... .......... .......... .......... .......... 74% 2.86M 4s
## 22450K .......... .......... .......... .......... .......... 74% 1.09M 4s
## 22500K .......... .......... .......... .......... .......... 74% 1.73M 4s
## 22550K .......... .......... .......... .......... .......... 74% 1.97M 4s
## 22600K .......... .......... .......... .......... .......... 74% 2.55M 4s
## 22650K .......... .......... .......... .......... .......... 75% 4.55M 4s
## 22700K .......... .......... .......... .......... .......... 75% 6.48M 4s
## 22750K .......... .......... .......... .......... .......... 75% 1.70M 4s
## 22800K .......... .......... .......... .......... .......... 75% 2.18M 4s
## 22850K .......... .......... .......... .......... .......... 75% 1.12M 4s
## 22900K .......... .......... .......... .......... .......... 75% 5.77M 4s
## 22950K .......... .......... .......... .......... .......... 76% 2.80M 4s
## 23000K .......... .......... .......... .......... .......... 76% 2.55M 4s
## 23050K .......... .......... .......... .......... .......... 76% 2.18M 4s
## 23100K .......... .......... .......... .......... .......... 76% 2.01M 4s
## 23150K .......... .......... .......... .......... .......... 76% 2.02M 4s
## 23200K .......... .......... .......... .......... .......... 76% 1.88M 4s
## 23250K .......... .......... .......... .......... .......... 77% 2.15M 4s
## 23300K .......... .......... .......... .......... .......... 77% 1.71M 4s
## 23350K .......... .......... .......... .......... .......... 77% 3.25M 4s
## 23400K .......... .......... .......... .......... .......... 77% 1.95M 4s
## 23450K .......... .......... .......... .......... .......... 77% 1.96M 4s
## 23500K .......... .......... .......... .......... .......... 77% 2.88M 4s
## 23550K .......... .......... .......... .......... .......... 78% 1.71M 4s
## 23600K .......... .......... .......... .......... .......... 78% 2.22M 3s
## 23650K .......... .......... .......... .......... .......... 78% 2.24M 3s
## 23700K .......... .......... .......... .......... .......... 78% 1.57M 3s
## 23750K .......... .......... .......... .......... .......... 78% 2.87M 3s
## 23800K .......... .......... .......... .......... .......... 78% 2.07M 3s
## 23850K .......... .......... .......... .......... .......... 79% 2.40M 3s
## 23900K .......... .......... .......... .......... .......... 79% 1.79M 3s
## 23950K .......... .......... .......... .......... .......... 79% 1.09M 3s
## 24000K .......... .......... .......... .......... .......... 79% 8.56M 3s
## 24050K .......... .......... .......... .......... .......... 79% 2.56M 3s
## 24100K .......... .......... .......... .......... .......... 79% 1.66M 3s
## 24150K .......... .......... .......... .......... .......... 80% 2.84M 3s
## 24200K .......... .......... .......... .......... .......... 80% 2.34M 3s
## 24250K .......... .......... .......... .......... .......... 80% 1.81M 3s
## 24300K .......... .......... .......... .......... .......... 80% 2.57M 3s
## 24350K .......... .......... .......... .......... .......... 80% 1.63M 3s
## 24400K .......... .......... .......... .......... .......... 80% 2.17M 3s
## 24450K .......... .......... .......... .......... .......... 81% 1.95M 3s
## 24500K .......... .......... .......... .......... .......... 81% 2.76M 3s
## 24550K .......... .......... .......... .......... .......... 81% 1.77M 3s
## 24600K .......... .......... .......... .......... .......... 81% 1.76M 3s
## 24650K .......... .......... .......... .......... .......... 81% 3.31M 3s
## 24700K .......... .......... .......... .......... .......... 81% 2.07M 3s
## 24750K .......... .......... .......... .......... .......... 81% 1.60M 3s
## 24800K .......... .......... .......... .......... .......... 82% 1.56M 3s
## 24850K .......... .......... .......... .......... .......... 82% 2.96M 3s
## 24900K .......... .......... .......... .......... .......... 82% 3.47M 3s
## 24950K .......... .......... .......... .......... .......... 82% 1.84M 3s
## 25000K .......... .......... .......... .......... .......... 82% 2.72M 3s
## 25050K .......... .......... .......... .......... .......... 82% 1.52M 3s
## 25100K .......... .......... .......... .......... .......... 83% 2.78M 3s
## 25150K .......... .......... .......... .......... .......... 83% 995K 3s
## 25200K .......... .......... .......... .......... .......... 83% 11.6M 3s
## 25250K .......... .......... .......... .......... .......... 83% 3.03M 3s
## 25300K .......... .......... .......... .......... .......... 83% 1.85M 3s
## 25350K .......... .......... .......... .......... .......... 83% 2.56M 3s
## 25400K .......... .......... .......... .......... .......... 84% 1.87M 3s
## 25450K .......... .......... .......... .......... .......... 84% 2.32M 2s
## 25500K .......... .......... .......... .......... .......... 84% 1.99M 2s
## 25550K .......... .......... .......... .......... .......... 84% 1.24M 2s
## 25600K .......... .......... .......... .......... .......... 84% 5.88M 2s
## 25650K .......... .......... .......... .......... .......... 84% 1.60M 2s
## 25700K .......... .......... .......... .......... .......... 85% 2.51M 2s
## 25750K .......... .......... .......... .......... .......... 85% 1.52M 2s
## 25800K .......... .......... .......... .......... .......... 85% 5.25M 2s
## 25850K .......... .......... .......... .......... .......... 85% 2.36M 2s
## 25900K .......... .......... .......... .......... .......... 85% 1.02M 2s
## 25950K .......... .......... .......... .......... .......... 85% 389K 2s
## 26000K .......... .......... .......... .......... .......... 86% 99.6K 2s
## 26050K .......... .......... .......... .......... .......... 86% 29.1M 2s
## 26100K .......... .......... .......... .......... .......... 86% 27.9M 2s
## 26150K .......... .......... .......... .......... .......... 86% 26.3M 2s
## 26200K .......... .......... .......... .......... .......... 86% 32.1M 2s
## 26250K .......... .......... .......... .......... .......... 86% 33.9M 2s
## 26300K .......... .......... .......... .......... .......... 87% 33.6M 2s
## 26350K .......... .......... .......... .......... .......... 87% 27.9M 2s
## 26400K .......... .......... .......... .......... .......... 87% 38.5M 2s
## 26450K .......... .......... .......... .......... .......... 87% 40.5M 2s
## 26500K .......... .......... .......... .......... .......... 87% 47.1M 2s
## 26550K .......... .......... .......... .......... .......... 87% 47.7M 2s
## 26600K .......... .......... .......... .......... .......... 88% 44.6M 2s
## 26650K .......... .......... .......... .......... .......... 88% 51.5M 2s
## 26700K .......... .......... .......... .......... .......... 88% 10.4M 2s
## 26750K .......... .......... .......... .......... .......... 88% 173K 2s
## 26800K .......... .......... .......... .......... .......... 88% 30.3M 2s
## 26850K .......... .......... .......... .......... .......... 88% 37.2M 2s
## 26900K .......... .......... .......... .......... .......... 89% 41.1M 2s
## 26950K .......... .......... .......... .......... .......... 89% 40.1M 2s
## 27000K .......... .......... .......... .......... .......... 89% 45.3M 2s
## 27050K .......... .......... .......... .......... .......... 89% 43.8M 2s
## 27100K .......... .......... .......... .......... .......... 89% 50.0M 2s
## 27150K .......... .......... .......... .......... .......... 89% 47.3M 2s
## 27200K .......... .......... .......... .......... .......... 90% 3.32M 2s
## 27250K .......... .......... .......... .......... .......... 90% 2.36M 2s
## 27300K .......... .......... .......... .......... .......... 90% 2.50M 2s
## 27350K .......... .......... .......... .......... .......... 90% 1.95M 2s
## 27400K .......... .......... .......... .......... .......... 90% 1.47M 1s
## 27450K .......... .......... .......... .......... .......... 90% 1.32M 1s
## 27500K .......... .......... .......... .......... .......... 91% 1.04M 1s
## 27550K .......... .......... .......... .......... .......... 91% 4.77M 1s
## 27600K .......... .......... .......... .......... .......... 91% 2.27M 1s
## 27650K .......... .......... .......... .......... .......... 91% 2.76M 1s
## 27700K .......... .......... .......... .......... .......... 91% 1.83M 1s
## 27750K .......... .......... .......... .......... .......... 91% 2.48M 1s
## 27800K .......... .......... .......... .......... .......... 92% 2.33M 1s
## 27850K .......... .......... .......... .......... .......... 92% 1.73M 1s
## 27900K .......... .......... .......... .......... .......... 92% 3.16M 1s
## 27950K .......... .......... .......... .......... .......... 92% 1.70M 1s
## 28000K .......... .......... .......... .......... .......... 92% 2.24M 1s
## 28050K .......... .......... .......... .......... .......... 92% 2.08M 1s
## 28100K .......... .......... .......... .......... .......... 93% 1.70M 1s
## 28150K .......... .......... .......... .......... .......... 93% 1.91M 1s
## 28200K .......... .......... .......... .......... .......... 93% 2.32M 1s
## 28250K .......... .......... .......... .......... .......... 93% 3.17M 1s
## 28300K .......... .......... .......... .......... .......... 93% 1.63M 1s
## 28350K .......... .......... .......... .......... .......... 93% 1.76M 1s
## 28400K .......... .......... .......... .......... .......... 94% 2.19M 1s
## 28450K .......... .......... .......... .......... .......... 94% 2.38M 1s
## 28500K .......... .......... .......... .......... .......... 94% 1.51M 1s
## 28550K .......... .......... .......... .......... .......... 94% 3.39M 1s
## 28600K .......... .......... .......... .......... .......... 94% 1.85M 1s
## 28650K .......... .......... .......... .......... .......... 94% 3.50M 1s
## 28700K .......... .......... .......... .......... .......... 95% 2.12M 1s
## 28750K .......... .......... .......... .......... .......... 95% 1.65M 1s
## 28800K .......... .......... .......... .......... .......... 95% 2.24M 1s
## 28850K .......... .......... .......... .......... .......... 95% 2.21M 1s
## 28900K .......... .......... .......... .......... .......... 95% 2.22M 1s
## 28950K .......... .......... .......... .......... .......... 95% 1.69M 1s
## 29000K .......... .......... .......... .......... .......... 96% 2.68M 1s
## 29050K .......... .......... .......... .......... .......... 96% 2.24M 1s
## 29100K .......... .......... .......... .......... .......... 96% 2.08M 1s
## 29150K .......... .......... .......... .......... .......... 96% 1.38M 1s
## 29200K .......... .......... .......... .......... .......... 96% 2.78M 1s
## 29250K .......... .......... .......... .......... .......... 96% 1.63M 1s
## 29300K .......... .......... .......... .......... .......... 97% 4.29M 0s
## 29350K .......... .......... .......... .......... .......... 97% 1.67M 0s
## 29400K .......... .......... .......... .......... .......... 97% 1.94M 0s
## 29450K .......... .......... .......... .......... .......... 97% 2.73M 0s
## 29500K .......... .......... .......... .......... .......... 97% 2.49M 0s
## 29550K .......... .......... .......... .......... .......... 97% 1.68M 0s
## 29600K .......... .......... .......... .......... .......... 98% 1.96M 0s
## 29650K .......... .......... .......... .......... .......... 98% 2.49M 0s
## 29700K .......... .......... .......... .......... .......... 98% 2.18M 0s
## 29750K .......... .......... .......... .......... .......... 98% 2.28M 0s
## 29800K .......... .......... .......... .......... .......... 98% 2.13M 0s
## 29850K .......... .......... .......... .......... .......... 98% 1.58M 0s
## 29900K .......... .......... .......... .......... .......... 99% 2.88M 0s
## 29950K .......... .......... .......... .......... .......... 99% 1.83M 0s
## 30000K .......... .......... .......... .......... .......... 99% 2.07M 0s
## 30050K .......... .......... .......... .......... .......... 99% 2.00M 0s
## 30100K .......... .......... .......... .......... .......... 99% 1.81M 0s
## 30150K .......... .......... .......... .......... .......... 99% 1.93M 0s
## 30200K .......... .......... .......... .......... ... 100% 5.33M=16s
##
## 2016-03-02 06:22:39 (1.85 MB/s) - 'gg_13_5_otus.tar.gz' saved [30969823/30969823]
##
## x ./gg_13_5_otus/
## x ./gg_13_5_otus/97_otu_taxonomy.txt
## x ./gg_13_5_otus/97_otus.fasta
Add the taxa outputted to the OTU table with the column header “taxonomy” and output the resulting file to biom format:
awk 'NR==1; NR > 1 {print $0 | "sort"}' dairybreeds.otu_table.txt > dairybreeds.otu_table.sort.txt
sort assigned_gg_taxa/dairybreeds.otus2_tax_assignments.txt > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.txt
{ printf '\ttaxonomy\t\t\n'; cat assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.txt ; } > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label.txt
awk '!/2496/{print}' assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label.txt > assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label2.txt
paste dairybreeds.otu_table.sort.txt <(cut -f 2 assigned_gg_taxa/dairybreeds.otus2_tax_assignments.sort.label2.txt) > dairybreeds.otu_table.tax.txt
rm dairybreeds.otu_table.sort.txt
biom convert --table-type "OTU table" -i dairybreeds.otu_table.tax.txt -o dairybreeds.otu_table.tax.biom --process-obs-metadata taxonomy --to-json
Remove sample x106(Jersey cow). Sample was not obtained from this cow.
printf "x106" > remove_sample.txt
filter_samples_from_otu_table.py -i dairybreeds.otu_table.tax.biom -o dairybreeds.otu_table.tax.filtered.biom --sample_id_fp remove_sample.txt --negate_sample_id_fp
Sequences were aligned using the RDP aligner. The aligned_dairybreeds.otus2.fasta_alignment_summary.txt obtained from the alignment is provided. This file was used to assess OTUs that did not aligned properly based on the following criteria:
Output files from the RDP aligner are provided.
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/aligned_dairybreeds.otus2.fasta
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/aligned_dairybreeds.otus2.fasta_alignment_summary.txt
Make a file containing all the OTUs that aligned properly
alignedOTUs <- read.table("aligned_dairybreeds.otus2.fasta_alignment_summary.txt",
header = T, sep = "\t")
properOTUtable <- subset(alignedOTUs, (Start >= 324 & Start <= 354) & End ==
454, select = SequenceId)
write.table(properOTUtable, file = "proper_aligned_otu.txt", col.names = F,
row.names = F)
Remove those OTUs that did not align well from the OTU table. Then remove OTUs with Cyanobacteria classification. UPARSE should have removed sinlgeton OTUs, but double check with the (-n 2 parameter):
filter_otus_from_otu_table.py -i dairybreeds.otu_table.tax.filtered.biom -o dairybreeds.otu_table.tax.filtered.filtered.biom --negate_ids_to_exclude -e proper_aligned_otu.txt -n 2
filter_taxa_from_otu_table.py -i dairybreeds.otu_table.tax.filtered.filtered.biom -o dairybreeds.otu_table.tax.final.biom -n p__Cyanobacteria
Generate quality filtered OTU table with assigend taxonomy
biom convert -i dairybreeds.otu_table.tax.final.biom -o Table_OTUs.txt --to-tsv --header-key taxonomy
otus_data <- read.table("Table_OTUs.txt", header = F, sep = "\t")
colnames(otus_data) <- c("OTU ID", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein3C", "Holstein2", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C", "Taxonomy")
otus_data_arranged <- otus_data[c("OTU ID", "Holstein1", "Holstein2", "Holstein3",
"Holstein4", "Holstein5", "Holstein1C", "Holstein2C", "Holstein3C", "Holstein4C",
"Holstein5C", "Jersey1", "Jersey2", "Jersey3", "Jersey4", "Taxonomy")]
write.table(otus_data_arranged, sep = "\t", file = "TableS3.txt", col.names = T,
row.names = F)
Use the aligned file to generate a phylogenetic tree using clearcut in mothur. Note that using the unfiltered aligned file does not affect downstream results. Clearcut requires ID lengths greater than ~10 characters, thus add 10 ’A’s to the front of all sequence names. Then remove the ’A’s after the tree is formed.
sed -i -e 's/>/>AAAAAAAAAA/g' aligned_dairybreeds.otus2.fasta
mothur "#dist.seqs(fasta=aligned_dairybreeds.otus2.fasta, output=lt)"
mothur "#clearcut(phylip=aligned_dairybreeds.otus2.phylip.dist)"
sed -i -e 's/AAAAAAAAAA//g' aligned_dairybreeds.otus2.phylip.tre
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > dist.seqs(fasta=aligned_dairybreeds.otus2.fasta, output=lt)
## [WARNING]: We found more than 25% of the bases in sequence AAAAAAAAAA#=GC_RF to be ambiguous. Mothur is not setup to process protein sequences.
##
## Using 1 processors.
## 0 0
## 100 0
## 200 0
## 300 0
## 400 0
## 500 1
## 600 1
## 700 1
## 800 1
## 900 1
## 1000 1
## 1100 2
## 1200 2
## 1300 2
## 1400 2
## 1500 3
## 1600 3
## 1700 3
## 1800 4
## 1900 4
## 2000 5
## 2100 5
## 2200 5
## 2300 6
## 2400 6
## 2500 7
## 2600 7
## 2700 8
## 2800 9
## 2900 9
## 3000 10
## 3100 10
## 3200 11
## 3249 11
##
## Output File Names:
## aligned_dairybreeds.otus2.phylip.dist
##
## It took 11 seconds to calculate the distances for 3250 sequences.
##
## mothur > quit()
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > clearcut(phylip=aligned_dairybreeds.otus2.phylip.dist)
##
## Output File Names:
## aligned_dairybreeds.otus2.phylip.tre
##
##
## mothur > quit()
Rarefy quality filtered OTU table to the lowest depth across samples and compute alpha diversity metrics and generate rarefaction plots. Note from QIIME: “If the lines for some categories do not extend all the way to the right end of the x-axis, that means that at least one of the samples in that category does not have that many sequences.”
biom summarize-table -i dairybreeds.otu_table.tax.final.biom -o dairybreeds.otu_table.tax.final.summary.txt
single_rarefaction.py -i dairybreeds.otu_table.tax.final.biom -d 12141 -o dairybreeds.otu_table_rarefied.biom
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/dairybreeds.otu_table_rarefied.biom
## --2016-03-02 06:24:04-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/dairybreeds.otu_table_rarefied.biom
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 906210 (885K) [application/octet-stream]
## Saving to: 'dairybreeds.otu_table_rarefied.biom'
##
## 0K .......... .......... .......... .......... .......... 5% 996K 1s
## 50K .......... .......... .......... .......... .......... 11% 1.16M 1s
## 100K .......... .......... .......... .......... .......... 16% 1.41M 1s
## 150K .......... .......... .......... .......... .......... 22% 1.37M 1s
## 200K .......... .......... .......... .......... .......... 28% 1.36M 1s
## 250K .......... .......... .......... .......... .......... 33% 1.54M 0s
## 300K .......... .......... .......... .......... .......... 39% 1.38M 0s
## 350K .......... .......... .......... .......... .......... 45% 1.16M 0s
## 400K .......... .......... .......... .......... .......... 50% 1.52M 0s
## 450K .......... .......... .......... .......... .......... 56% 1.45M 0s
## 500K .......... .......... .......... .......... .......... 62% 1.92M 0s
## 550K .......... .......... .......... .......... .......... 67% 1.59M 0s
## 600K .......... .......... .......... .......... .......... 73% 1.92M 0s
## 650K .......... .......... .......... .......... .......... 79% 1.13M 0s
## 700K .......... .......... .......... .......... .......... 84% 2.21M 0s
## 750K .......... .......... .......... .......... .......... 90% 1.32M 0s
## 800K .......... .......... .......... .......... .......... 96% 1.22M 0s
## 850K .......... .......... .......... .... 100% 4.46M=0.6s
##
## 2016-03-02 06:24:05 (1.43 MB/s) - 'dairybreeds.otu_table_rarefied.biom' saved [906210/906210]
alpha_diversity.py -i dairybreeds.otu_table_rarefied.biom -m chao1,observed_otus,goods_coverage -o alpha_rarefaction_even.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/qiime_parameters.txt
alpha_rarefaction.py -i dairybreeds.otu_table_rarefied.biom -n 10 --min_rare_depth 1 -m mappingfile.txt -p qiime_parameters.txt -o alpha_rarefaction_plots_even
## --2016-03-02 06:24:07-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/qiime_parameters.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 203 [text/plain]
## Saving to: 'qiime_parameters.txt'
##
## 0K 100% 8.07M=0s
##
## 2016-03-02 06:24:08 (8.07 MB/s) - 'qiime_parameters.txt' saved [203/203]
##
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/matplotlib/collections.py:590: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
## if self._edgecolors == str('face'):
Generate alpha diversity charts
library(XML)
library(matrixStats)
## matrixStats v0.50.1 (2015-12-14) successfully loaded. See ?matrixStats for help.
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:plyr':
##
## count
rare_table <- readHTMLTable("alpha_rarefaction_plots_even/alpha_rarefaction_plots/rarefaction_plots.html")
rare_table$rare_data[rare_table$rare_data == "nan"] <- NA
alpha_rare <- na.omit(rare_table$rare_data)
colnames(alpha_rare)[2] <- "Seqs.Sample"
colnames(alpha_rare)[3] <- "chao1.Ave."
colnames(alpha_rare)[4] <- "chao1.Err."
colnames(alpha_rare)[5] <- "observed_otus.Ave."
colnames(alpha_rare)[6] <- "observed_otus.Err."
cols = c(2, 3, 4, 5, 6)
alpha_rare[, cols] <- lapply(cols, function(x) as.numeric(as.character(alpha_rare[,
x])))
breed_rare <- subset(alpha_rare, Description %in% c("Holstein", "HolsteinC",
"Jersey"))
method_rare <- subset(alpha_rare, Description %in% c("Cannula", "TubingH"))
breed_rare$label <- "Breed"
method_rare$label <- "Sampling Method"
alpha_rare <- rbind(breed_rare, method_rare)
pd <- position_dodge(width = 275)
rare_otu_plot <- ggplot(alpha_rare, aes(x = Seqs.Sample, y = observed_otus.Ave.,
colour = Description, group = Description, ymin = observed_otus.Ave. - observed_otus.Err.,
ymax = observed_otus.Ave. + observed_otus.Err.)) + geom_line(position = pd) +
geom_pointrange(position = pd) + scale_colour_manual(values = c("#FF0000",
"#0000FF", "#FFA500", "#006400", "#FF00FF"), limits = c("Holstein", "HolsteinC",
"Jersey", "Cannula", "TubingH"), labels = c("Holstein", "HolsteinC", "Jersey",
"Cannula", "Tubing")) + labs(x = "Sequences per Sample", y = "Mean Observed OTUs") +
theme(legend.title = element_blank(), axis.text = element_text(color = "black",
size = 8, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(face = "bold"),
strip.text = element_text(size = 12, face = "bold")) + facet_grid(~label)
rare_chao1_plot <- ggplot(alpha_rare, aes(x = Seqs.Sample, y = chao1.Ave., colour = Description,
group = Description, ymin = chao1.Ave. - chao1.Err., ymax = chao1.Ave. +
chao1.Err.)) + geom_line(position = pd) + geom_pointrange(position = pd) +
scale_colour_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#006400",
"#FF00FF"), limits = c("Holstein", "HolsteinC", "Jersey", "Cannula",
"TubingH"), labels = c("Holstein", "HolsteinC", "Jersey", "Cannula",
"Tubing")) + labs(x = "Sequences per Sample", y = "Mean Chao1") + theme(legend.title = element_blank(),
axis.text = element_text(color = "black", size = 8, face = "bold"), axis.title = element_text(color = "black",
size = 12, face = "bold"), legend.text = element_text(face = "bold"),
strip.text = element_text(size = 12, face = "bold")) + facet_grid(~label)
png("FigS2.png", units = "in", height = 6, width = 12, res = 300)
multiplot(rare_chao1_plot, rare_otu_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
pdf("FigS2.pdf", height = 6, width = 12)
multiplot(rare_chao1_plot, rare_otu_plot, cols = 2)
dev.off()
## quartz_off_screen
## 2
Perform a two-sided t-test to evaluate alpha diversity metrics
alpha <- read.table("alpha_rarefaction_even.txt", header = T, sep = "\t")
alpha_sub <- alpha[, c(2, 3)]
breed_data <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"HolsteinC", "HolsteinC", "Holstein", "HolsteinC", "Jersey", "HolsteinC",
"Jersey", "HolsteinC")
alpha_metrics <- data.frame(alpha_sub, breed_data)
breed_final <- subset(alpha_metrics, breed_data != "HolsteinC")
method_final <- subset(alpha_metrics, breed_data != "Jersey")
# Levene test Ho: Variances of breeds are equal
leveneTest(breed_final$chao1 ~ breed_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.002 0.9652
## 7
leveneTest(breed_final$observed_otus ~ breed_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.0058 0.9416
## 7
# T test Ho: alpha metrics Holstein = alpha metrics Jersey Two-sided t test
# default options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(breed_final$chao1 ~ breed_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: breed_final$chao1 by breed_final$breed_data
## t = 3.1496, df = 7, p-value = 0.01616
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 73.34562 515.21409
## sample estimates:
## mean in group Holstein mean in group Jersey
## 1846.347 1552.067
t.test(breed_final$observed_otus ~ breed_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: breed_final$observed_otus by breed_final$breed_data
## t = 2.9867, df = 7, p-value = 0.02032
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 35.64569 306.65431
## sample estimates:
## mean in group Holstein mean in group Jersey
## 1343.40 1172.25
# Levene test Ho: Variances of methods are equal
leveneTest(method_final$chao1 ~ method_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.1348 0.723
## 8
leveneTest(method_final$observed_otus ~ method_final$breed_data)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 1 0.2746 0.6145
## 8
# T test Ho: alpha metrics Tubing = alpha metrics Cannula Two-sided t test
# default options used: mu = 0, alt = 'two.sided', conf = 0.95, paired = F
t.test(method_final$chao1 ~ method_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: method_final$chao1 by method_final$breed_data
## t = 0.35397, df = 8, p-value = 0.7325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -155.6428 212.0891
## sample estimates:
## mean in group Holstein mean in group HolsteinC
## 1846.347 1818.123
t.test(method_final$observed_otus ~ method_final$breed_data, var.eq = T)
##
## Two Sample t-test
##
## data: method_final$observed_otus by method_final$breed_data
## t = 0.38666, df = 8, p-value = 0.7091
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -92.32815 129.52815
## sample estimates:
## mean in group Holstein mean in group HolsteinC
## 1343.4 1324.8
Assign taxonomy and generate plots for desired taxa.
summarize_taxa.py -i dairybreeds.otu_table_rarefied.biom -o summarize_taxa -L 2,3,4,5,6,7
Generate a stacked bar chart for the phylum taxonomic rank across samples.
taxa_data <- read.table("summarize_taxa/dairybreeds.otu_table_rarefied_L2.txt",
header = F, sep = "\t")
colnames(taxa_data) <- c("Phylum", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C")
taxa_data2 <- taxa_data[c("Phylum", "Holstein1", "Holstein1C", "Holstein2",
"Holstein2C", "Holstein3", "Holstein3C", "Holstein4", "Holstein4C", "Holstein5",
"Holstein5C", "Jersey1", "Jersey2", "Jersey3", "Jersey4")]
taxa_data2$Phylum <- sub("k__", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Bacteria;", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("p__", "", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Unassigned;Other", "Unassigned", taxa_data2$Phylum)
taxa_data2$Phylum <- sub("Other", "No Assigned Phylum", taxa_data2$Phylum)
plot_taxa_long <- gather(taxa_data2, Samples, Proportion, Holstein1:Jersey4)
png("FigS3.png", units = "in", height = 12, width = 17, res = 300)
ggplot(plot_taxa_long, aes(x = Samples, y = Proportion, fill = Phylum)) + geom_bar(stat = "identity") +
scale_fill_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#556B2F",
"#800080", "#FFFF00", "#00FFFF", "#FFC0CB", "#5F9EA0", "#A52A2A", "#808080",
"#00FF00", "#000080", "#ADD8E6", "#E9967A", "#00FA9A", "#DA70D6", "#FFD700")) +
theme(axis.text = element_text(color = "black", size = 11, face = "bold"),
axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 12, face = "bold"), legend.title = element_text(size = 12))
dev.off()
## quartz_off_screen
## 2
pdf("FigS3.pdf", height = 12, width = 17)
ggplot(plot_taxa_long, aes(x = Samples, y = Proportion, fill = Phylum)) + geom_bar(stat = "identity") +
scale_fill_manual(values = c("#FF0000", "#0000FF", "#FFA500", "#556B2F",
"#800080", "#FFFF00", "#00FFFF", "#FFC0CB", "#5F9EA0", "#A52A2A", "#808080",
"#00FF00", "#000080", "#ADD8E6", "#E9967A", "#00FA9A", "#DA70D6", "#FFD700")) +
theme(axis.text = element_text(color = "black", size = 11, face = "bold"),
axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 12, face = "bold"), legend.title = element_text(size = 12))
dev.off()
## quartz_off_screen
## 2
Generate core file of OTUS. To include an OTU as a member of the core, a cut off of 80% (4/5 cows) was assigned for Holstein cows and 75% (3/4 cows) for Jersey cows.
mkdir cores
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_Holstein__.biom -o cores/core_Holstein.biom -s 4
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_HolsteinC__.biom -o cores/core_HolsteinC.biom -s 4
filter_otus_from_otu_table.py -i split_by_breed/dairybreeds.otu_table_rarefied__Breed_Jersey__.biom -o cores/core_Jersey.biom -s 3
merge_otu_tables.py -i cores/core_Holstein.biom,cores/core_HolsteinC.biom,cores/core_Jersey.biom -o cores/merged_cores.biom
biom convert -i cores/merged_cores.biom -o cores/merged_cores.txt --to-tsv
When cores files are merged, values are set to 0 for an OTU that is not a core in any other of the merged files. However, this does not mean that the OTU was not present but that it did not meet the cutoff criteria in a particular core file. Reads across OTUs have to be adjusted in the merged core file. From the adjusted core file, filter the OTU table into by breed and sampling method to generate respective beta diversity outputs. Additionally, from a file with all the data generate beta diversity outputs.
cores <- read.table("cores/merged_cores.txt", sep = "\t", header = F)
cores_sub <- cores[, 1]
write.table(cores_sub, file = "cores/core_otus.txt", col.names = F, row.names = F)
filter_otus_from_otu_table.py -i dairybreeds.otu_table_rarefied.biom -o final_core.biom --negate_ids_to_exclude -e cores/core_otus.txt
filter_samples_from_otu_table.py -i final_core.biom -o filter_breed.biom -m mappingfile.txt -s 'Breed:*,!HolsteinC'
filter_samples_from_otu_table.py -i final_core.biom -o filter_method.biom -m mappingfile.txt -s 'Breed:*,!Jersey'
beta_diversity_through_plots.py -i final_core.biom -o beta_div_all -t aligned_dairybreeds.otus2.phylip.tre -m mappingfile.txt -p qiime_parameters.txt
beta_diversity_through_plots.py -i filter_breed.biom -o beta_div_breed -t aligned_dairybreeds.otus2.phylip.tre -m mappingfile.txt -p qiime_parameters.txt
beta_diversity_through_plots.py -i filter_method.biom -o beta_div_method -m mappingfile.txt -t aligned_dairybreeds.otus2.phylip.tre -p qiime_parameters.txt
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
## /Users/henry/anaconda/envs/projectEnv/lib/python2.7/site-packages/numpy/core/fromnumeric.py:2645: VisibleDeprecationWarning: `rank` is deprecated; use the `ndim` attribute or function instead. To find the rank of a matrix see `numpy.linalg.matrix_rank`.
## VisibleDeprecationWarning)
Generate the principal coordinate analyses plots
# breed and sampling method
all_data <- read.table("beta_div_all/weighted_unifrac_pc.txt", skip = 9, nrows = 14,
sep = "\t")
all_pc <- all_data[, c("V2", "V3")]
samples_all <- c("H5", "H3", "J3", "H1", "J4", "H2", "H3C", "H4C", "H4", "H5C",
"J1", "H2C", "J2", "H1C")
breed_all <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"HolsteinC", "HolsteinC", "Holstein", "HolsteinC", "Jersey", "HolsteinC",
"Jersey", "HolsteinC")
pc_all <- data.frame(samples_all, all_pc, breed_all)
colnames(pc_all) <- c("SampleID", "PC1", "PC2", "Breed")
all_pc_plot <- ggplot(pc_all, aes(x = PC1, y = PC2, shape = Breed)) + geom_point(size = 3) +
labs(x = "PC1 (31.8%)", y = "PC2 (16.5%)") + ggtitle("Breed and Sampling Method") +
theme(title = element_text(color = "black", size = 14, face = "bold"), axis.text = element_text(color = "black",
size = 14, face = "bold"), axis.title = element_text(color = "black",
size = 14, face = "bold"), legend.text = element_text(size = 14, face = "bold"),
legend.title = element_blank()) + geom_text(aes(label = SampleID), size = 4,
vjust = -0.5, hjust = -0.3)
# breed
breed_data <- read.table("beta_div_breed/weighted_unifrac_pc.txt", skip = 9,
nrows = 9, sep = "\t")
breed_pc <- breed_data[, c("V2", "V3")]
samples <- c("H5", "H3", "J3", "H1", "J4", "H2", "H4", "J1", "J2")
breed <- c("Holstein", "Holstein", "Jersey", "Holstein", "Jersey", "Holstein",
"Holstein", "Jersey", "Jersey")
pc_breed <- data.frame(samples, breed_pc, breed)
colnames(pc_breed) <- c("SampleID", "PC1", "PC2", "Breed")
breed_pc_plot <- ggplot(pc_breed, aes(x = PC1, y = PC2, shape = Breed)) + geom_point(size = 3) +
labs(x = "PC1 (36.9%)", y = "PC2 (20.3%)") + ggtitle("Breed") + theme(title = element_text(color = "black",
size = 14, face = "bold"), axis.text = element_text(color = "black", size = 14,
face = "bold"), axis.title = element_text(color = "black", size = 14, face = "bold"),
legend.text = element_text(size = 14, face = "bold"), legend.title = element_blank()) +
geom_text(aes(label = SampleID), size = 4, vjust = -0.5, hjust = -0.3)
# sampling method
method_data <- read.table("beta_div_method/weighted_unifrac_pc.txt", skip = 9,
nrows = 10, sep = "\t")
method_pc <- method_data[, c("V2", "V3")]
samples_method <- c("H5", "H3", "H1", "H2", "H3C", "H4C", "H4", "H5C", "H2C",
"H1C")
method <- c("Tubing", "Tubing", "Tubing", "Tubing", "Cannula", "Cannula", "Tubing",
"Cannula", "Cannula", "Cannula")
pc_method <- data.frame(samples_method, method_pc, method)
colnames(pc_method) <- c("SampleID", "PC1", "PC2", "Method")
method_pc_plot <- ggplot(pc_method, aes(x = PC1, y = PC2, shape = Method)) +
geom_point(size = 3) + labs(x = "PC1 (32.4%)", y = "PC2 (24.3%)") + ggtitle("Sampling Method") +
theme(title = element_text(color = "black", size = 14, face = "bold"), axis.text = element_text(color = "black",
size = 14, face = "bold"), axis.title = element_text(color = "black",
size = 14, face = "bold"), legend.text = element_text(size = 14, face = "bold"),
legend.title = element_blank()) + geom_text(aes(label = SampleID), size = 4,
vjust = -0.5, hjust = -0.3)
png("Fig2.png", height = 8, width = 26, units = "in", res = 300)
multiplot(all_pc_plot, breed_pc_plot, method_pc_plot, cols = 3)
dev.off()
## quartz_off_screen
## 2
pdf("Fig2.pdf", height = 8, width = 26)
multiplot(all_pc_plot, breed_pc_plot, method_pc_plot, cols = 3)
dev.off()
## quartz_off_screen
## 2
Evaluate the effect of breed and method on microbial community composition using the permutational multivariate analysis of variance (permanova) test (implemented in R as adonis function in the vegan package) using the unweighted unifrac distance matrix
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.3-4
# Mapping files
mappingfile <- read.table("mappingfile.txt", sep = "\t", header = F)
colnames(mappingfile) <- c("SampleID", "BarcodeSequence", "LinkerPrimerSequence",
"ReversePrimer", "Method", "Breed", "BCcount", "Well", "Description")
mappingcorrect <- mappingfile[-3, ]
breed_map <- subset(mappingcorrect, Breed != "HolsteinC")
method_map <- subset(mappingfile, Breed != "Jersey")
breed_weighted <- read.table("beta_div_breed/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order breed mapping file Sample IDs like matirx Sample IDs
breed_map = breed_map[match(breed_weighted$X, breed_map$SampleID), ]
row.names(breed_weighted) <- breed_weighted$X
breed_weighted <- breed_weighted[, -1]
breed_weighted <- as.dist(breed_weighted)
method_weighted <- read.table("beta_div_method/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order method mapping file Sample IDs like matirx Sample IDs
method_map = method_map[match(method_weighted$X, method_map$SampleID), ]
row.names(method_weighted) <- method_weighted$X
method_weighted <- method_weighted[, -1]
method_weighted <- as.dist(method_weighted)
all_weighted <- read.table("beta_div_all/weighted_unifrac_dm.txt", sep = "\t",
header = TRUE)
# Order method mapping file Sample IDs like matirx Sample IDs
mappingcorrect = mappingcorrect[match(all_weighted$X, mappingcorrect$SampleID),
]
row.names(all_weighted) <- all_weighted$X
all_weighted <- all_weighted[, -1]
all_weighted <- as.dist(all_weighted)
adonis(breed_weighted ~ Breed, permutations = 999, data = breed_map)
##
## Call:
## adonis(formula = breed_weighted ~ Breed, data = breed_map, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Breed 1 0.22239 0.222393 2.4705 0.26086 0.007 **
## Residuals 7 0.63015 0.090022 0.73914
## Total 8 0.85254 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
adonis(method_weighted ~ Method, permutations = 999, data = method_map)
##
## Call:
## adonis(formula = method_weighted ~ Method, data = method_map, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Method 1 0.05418 0.054183 0.80592 0.09152 0.634
## Residuals 8 0.53785 0.067231 0.90848
## Total 9 0.59203 1.00000
adonis(all_weighted ~ Breed, permutations = 999, data = mappingcorrect)
##
## Call:
## adonis(formula = all_weighted ~ Breed, data = mappingcorrect, permutations = 999)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## Breed 2 0.30681 0.153405 1.9643 0.26316 0.013 *
## Residuals 11 0.85905 0.078095 0.73684
## Total 13 1.16586 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
HOMOVA
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.dist.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.design.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.dist.txt
wget https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.design.txt
## --2016-03-02 06:26:02-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.dist.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 923 [text/plain]
## Saving to: 'breed.dist.txt'
##
## 0K 100% 73.4M=0s
##
## 2016-03-02 06:26:02 (73.4 MB/s) - 'breed.dist.txt' saved [923/923]
##
## --2016-03-02 06:26:02-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/breed.design.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 107 [text/plain]
## Saving to: 'breed.design.txt'
##
## 0K 100% 9.28M=0s
##
## 2016-03-02 06:26:02 (9.28 MB/s) - 'breed.design.txt' saved [107/107]
##
## --2016-03-02 06:26:02-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.dist.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 1149 (1.1K) [text/plain]
## Saving to: 'method.dist.txt'
##
## 0K . 100% 57.7M=0s
##
## 2016-03-02 06:26:03 (57.7 MB/s) - 'method.dist.txt' saved [1149/1149]
##
## --2016-03-02 06:26:03-- https://raw.githubusercontent.com/enriquepaz/2016_Paz_et_al_Dairy_Breeds/master/method.design.txt
## Resolving raw.githubusercontent.com... 23.235.44.133
## Connecting to raw.githubusercontent.com|23.235.44.133|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 119 [text/plain]
## Saving to: 'method.design.txt'
##
## 0K 100% 10.3M=0s
##
## 2016-03-02 06:26:03 (10.3 MB/s) - 'method.design.txt' saved [119/119]
mothur "#homova(phylip=breed.dist.txt, design=breed.design.txt)"
mothur "#homova(phylip=method.dist.txt, design=method.design.txt)"
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > homova(phylip=breed.dist.txt, design=breed.design.txt)
## HOMOVA BValue P-value SSwithin/(Ni-1)_values
## Holstein-Jersey 0.0805971 0.254 0.0772381 0.107066
## Experiment-wise error rate: 0.05
## If you have borderline P-values, you should try increasing the number of iterations
##
## Output File Names:
## breed.dist.homova
##
##
## mothur > quit()
## [H[2J
##
##
##
##
##
## mothur v.1.36.1
## Last updated: 7/27/2015
##
## by
## Patrick D. Schloss
##
## Department of Microbiology & Immunology
## University of Michigan
## pschloss@umich.edu
## http://www.mothur.org
##
## When using, please cite:
## Schloss, P.D., et al., Introducing mothur: Open-source, platform-independent, community-supported software for describing and comparing microbial communities. Appl Environ Microbiol, 2009. 75(23):7537-41.
##
## Distributed under the GNU General Public License
##
## Type 'help()' for information on the commands that are available
##
## Type 'quit()' to exit program
##
##
##
## mothur > homova(phylip=method.dist.txt, design=method.design.txt)
## HOMOVA BValue P-value SSwithin/(Ni-1)_values
## cannula-tubing 0.0796527 0.285 0.0572247 0.0772381
## Experiment-wise error rate: 0.05
## If you have borderline P-values, you should try increasing the number of iterations
##
## Output File Names:
## method.dist.homova
##
##
## mothur > quit()
Make heat map of all core OTUs
mkdir heatmap
biom convert -i final_core.biom -o heatmap/final_core_json.biom --to-json --table-type="OTU table"
library(Heatplus)
library(RColorBrewer)
otu_core <- read_biom("heatmap/final_core_json.biom")
core_table <- as.data.frame(as(biom_data(otu_core), "matrix"))
colnames(core_table) <- c("Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C",
"Jersey1", "Holstein2C", "Jersey2", "Holstein1C")
core_table_trans <- as.data.frame(t(core_table))
data_propor <- core_table_trans/rowSums(core_table_trans)
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
maxab <- apply(data_propor, 2, max)
n1 <- names(which(maxab < 0.01))
data_abun <- data_propor[, -which(names(data_propor) %in% n1)]
data.dist <- vegdist(data_propor, method = "bray")
row.clus <- hclust(data.dist, "aver")
data.dist.g <- vegdist(t(data_abun), method = "bray")
col.clus <- hclust(data.dist.g, "aver")
png("FigS5.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(data_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
pdf("FigS5.pdf", height = 8, width = 8)
heatmap.2(as.matrix(data_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
LEfSe was used to determine different OTUs between breeds and sampling methods
biom convert -i filter_breed.biom -o filter_breed_json.biom --to-json --table-type="OTU table"
biom convert -i filter_method.biom -o filter_method_json.biom --to-json --table-type="OTU table"
Create files with LEfSe format
breed_biom <- read_biom("filter_breed_json.biom")
breed_table <- as.data.frame(as(biom_data(breed_biom), "matrix"))
breed_rel <- sweep(breed_table, 2, colSums(breed_table), FUN = "/")
rownames(breed_rel) <- paste("OTU", rownames(breed_rel), sep = "")
breed_rel2 <- breed_rel[0, ]
breed_rel2[nrow(breed_rel2) + 1, ] <- c("Holstein", "Holstein", "Jersey", "Holstein",
"Jersey", "Holstein", "Holstein", "Jersey", "Jersey")
breed_rel2[nrow(breed_rel2) + 1, ] <- c("Holstein5", "Holstein3", "Jersey3",
"Holstein1", "Jersey4", "Holstein2", "Holstein4", "Jersey1", "Jersey2")
row.names(breed_rel2) <- c("breed", "ID")
breed_abundance <- rbind(breed_rel2, breed_rel)
write.table(breed_abundance, sep = "\t", file = "breed_abundance.txt", row.names = TRUE,
col.names = FALSE, quote = FALSE)
method_biom <- read_biom("filter_method_json.biom")
method_table <- as.data.frame(as(biom_data(method_biom), "matrix"))
method_rel <- sweep(method_table, 2, colSums(method_table), FUN = "/")
rownames(method_rel) <- paste("OTU", rownames(method_rel), sep = "")
method_rel2 <- method_rel[0, ]
method_rel2[nrow(method_rel2) + 1, ] <- c("Tubing", "Tubing", "Tubing", "Tubing",
"Cannula", "Cannula", "Tubing", "Cannula", "Cannula", "Cannula")
method_rel2[nrow(method_rel2) + 1, ] <- c("Holstein5", "Holstein3", "Holstein1",
"Holstein2", "Holstein3C", "Holstein4C", "Holstein4", "Holstein5C", "Holstein2C",
"Holstein1C")
row.names(method_rel2) <- c("method", "ID")
method_abundance <- rbind(method_rel2, method_rel)
write.table(method_abundance, sep = "\t", file = "method_abundance.txt", row.names = TRUE,
col.names = FALSE, quote = FALSE)
LEfSe was used to determine OTUs that differ
wget https://bitbucket.org/nsegata/lefse/get/default.zip -O lefse.zip
unzip lefse.zip
mv nsegata* lefse
python lefse/format_input.py breed_abundance.txt breed_lefse_format.txt -c 1 -u 2 -o 1000000
python lefse/run_lefse.py breed_lefse_format.txt breed_lefse_result.txt
python lefse/format_input.py method_abundance.txt method_lefse_format.txt -c 1 -u 2 -o 1000000
python lefse/run_lefse.py method_lefse_format.txt method_lefse_result.txt
## --2016-03-02 06:26:07-- https://bitbucket.org/nsegata/lefse/get/default.zip
## Resolving bitbucket.org... 104.192.143.2, 104.192.143.3, 104.192.143.1
## Connecting to bitbucket.org|104.192.143.2|:443... connected.
## HTTP request sent, awaiting response... 200 OK
## Length: 60035 (59K) [application/zip]
## Saving to: 'lefse.zip'
##
## 0K .......... .......... .......... .......... .......... 85% 335K 0s
## 50K ........ 100% 2.06M=0.2s
##
## 2016-03-02 06:26:08 (382 KB/s) - 'lefse.zip' saved [60035/60035]
##
## Archive: lefse.zip
## inflating: nsegata-lefse-e3cabe93a0d1/.hg_archival.txt
## inflating: nsegata-lefse-e3cabe93a0d1/.hgtags
## inflating: nsegata-lefse-e3cabe93a0d1/__init__.py
## inflating: nsegata-lefse-e3cabe93a0d1/example/run.sh
## inflating: nsegata-lefse-e3cabe93a0d1/format_input.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefse.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefse2circlader.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/AbundanceTable.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/CClade.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/ConstantsBreadCrumbs.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/ValidateData.py
## inflating: nsegata-lefse-e3cabe93a0d1/lefsebiom/__init__.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_cladogram.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_features.py
## inflating: nsegata-lefse-e3cabe93a0d1/plot_res.py
## inflating: nsegata-lefse-e3cabe93a0d1/qiime2lefse.py
## inflating: nsegata-lefse-e3cabe93a0d1/requirements.txt
## inflating: nsegata-lefse-e3cabe93a0d1/run_lefse.py
## Number of significantly discriminative features: 212 ( 212 ) before internal wilcoxon
## Number of discriminative features with abs LDA score > 2.0 : 181
## Number of significantly discriminative features: 62 ( 62 ) before internal wilcoxon
## Number of discriminative features with abs LDA score > 2.0 : 48
breed_lefse <- read.table("breed_lefse_result.txt", header = FALSE, sep = "\t")
breed_lefse <- breed_lefse[complete.cases(breed_lefse), ]
breed_lefse$V1 <- gsub("OTU", "", breed_lefse$V1)
write.table(breed_lefse$V1, file = "breed_lefse_otus.txt", row.names = FALSE,
col.names = FALSE, quote = FALSE)
method_lefse <- read.table("method_lefse_result.txt", header = FALSE, sep = "\t")
method_lefse <- method_lefse[complete.cases(method_lefse), ]
method_lefse$V1 <- gsub("OTU", "", method_lefse$V1)
write.table(method_lefse$V1, file = "method_lefse_otus.txt", row.names = FALSE,
col.names = FALSE, quote = FALSE)
Create input files to generate heat maps
biom convert -i filter_breed.biom -o heatmap/filter_breed.txt --to-tsv --header-key taxonomy
biom convert -i filter_method.biom -o heatmap/filter_method.txt --to-tsv --header-key taxonomy
awk '{gsub("; ","\t",$0); print;}' heatmap/filter_breed.txt | awk '{gsub("#OTU","OTU",$0); print;}' | cut -f-10,15 | tail -n +2 | awk '{if(NR==1){print $0,"\ttaxonomy"}else{print }}' > heatmap/breed_tax.txt
awk '{gsub("; ","\t",$0); print;}' heatmap/filter_method.txt | awk '{gsub("#OTU","OTU",$0); print;}' | cut -f-11,16 | tail -n +2 | awk '{if(NR==1){print $0,"\ttaxonomy"}else{print }}' > heatmap/method_tax.txt
# breed
breed_otu <- read.table("heatmap/breed_tax.txt", header = T, sep = "\t", fill = TRUE)
breed_otu$taxonomy <- sub("f__", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("\\]", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("\\[", "", breed_otu$taxonomy)
breed_otu$taxonomy <- sub("^$", "No Assigned Family", breed_otu$taxonomy)
colnames(breed_otu) <- c("OTUs", "Holstein5", "Holstein3", "Jersey3", "Holstein1",
"Jersey4", "Holstein2", "Holstein4", "Jersey1", "Jersey2", "taxonomy")
OTU.ID <- breed_otu[, 1]
taxonomy <- breed_otu[, 11]
breed_samples <- breed_otu[, -11]
row.names(breed_samples) <- breed_samples$OTUs
breed_samples <- breed_samples[, -1]
breed_trans <- as.data.frame(t(breed_samples))
breed_propor <- breed_trans/rowSums(breed_trans)
breed_propor_trans <- as.data.frame(t(breed_propor))
breed_proportion <- data.frame(OTU.ID, breed_propor_trans, taxonomy)
write.table(breed_proportion, file = "heatmap/breed_propor.txt", sep = "\t",
row.names = F, col.names = T, quote = F)
# method
method_otu <- read.table("heatmap/method_tax.txt", header = T, sep = "\t", fill = TRUE)
method_otu$taxonomy <- sub("f__", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("\\]", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("\\[", "", method_otu$taxonomy)
method_otu$taxonomy <- sub("^$", "No Assigned Family", method_otu$taxonomy)
colnames(method_otu) <- c("OTUs", "Holstein5", "Holstein3", "Holstein1", "Holstein2",
"Holstein3C", "Holstein4C", "Holstein4", "Holstein5C", "Holstein2C", "Holstein1C",
"taxonomy")
OTU.ID <- method_otu[, 1]
taxonomy <- method_otu[, 12]
method_samples <- method_otu[, -12]
row.names(method_samples) <- method_samples$OTUs
method_samples <- method_samples[, -1]
method_trans <- as.data.frame(t(method_samples))
method_propor <- method_trans/rowSums(method_trans)
method_propor_trans <- as.data.frame(t(method_propor))
method_proportion <- data.frame(OTU.ID, method_propor_trans, taxonomy)
write.table(method_proportion, file = "heatmap/method_propor.txt", sep = "\t",
row.names = F, col.names = T, quote = F)
awk '{gsub("OTU.ID","#OTU ID",$0); print;}' heatmap/breed_propor.txt> heatmap/breed_propor_final.txt
awk '{gsub("OTU.ID","#OTU ID",$0); print;}' heatmap/method_propor.txt> heatmap/method_propor_final.txt
biom convert -i heatmap/breed_propor_final.txt -o heatmap/breed_propor_final.biom --to-json --table-type="OTU table" --process-obs-metadata taxonomy
biom convert -i heatmap/method_propor_final.txt -o heatmap/method_propor_final.biom --to-json --table-type="OTU table" --process-obs-metadata taxonomy
filter_otus_from_otu_table.py -i heatmap/breed_propor_final.biom -o heatmap/breed_lefse.biom -e breed_lefse_otus.txt --negate_ids_to_exclude
filter_otus_from_otu_table.py -i heatmap/method_propor_final.biom -o heatmap/method_lefse.biom -e method_lefse_otus.txt --negate_ids_to_exclude
biom convert -i heatmap/breed_lefse.biom -o heatmap/breed_lefse.txt --to-tsv --header-key taxonomy
biom convert -i heatmap/method_lefse.biom -o heatmap/method_lefse.txt --to-tsv --header-key taxonomy
awk '{gsub("#OTU ID","OTUs",$0); print;}' heatmap/breed_lefse.txt> heatmap/breed_lefse_final.txt
awk '{gsub("#OTU ID","OTUs",$0); print;}' heatmap/method_lefse.txt> heatmap/method_lefse_final.txt
Make heat maps
# breed all
breed_lefse_all <- read.table("heatmap/breed_lefse_final.txt", header = T, sep = "\t")
breed_lefse_data <- breed_lefse_all[, -11]
row.names(breed_lefse_data) <- breed_lefse_data$OTUs
breed_lefse_data <- breed_lefse_data[, -1]
breed_all_trans <- as.data.frame(t(breed_lefse_data))
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
data.dist <- vegdist(breed_all_trans, method = "bray")
row.clus <- hclust(data.dist, "aver")
data.dist.g <- vegdist(t(breed_all_trans), method = "bray")
col.clus <- hclust(data.dist.g, "aver")
png("Fig3a.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(breed_all_trans), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
pdf("Fig3a.pdf", height = 8, width = 8)
heatmap.2(as.matrix(breed_all_trans), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(2, 8), trace = "none", density.info = "none",
labCol = "", xlab = "OTUs", ylab = "Samples", lhei = c(2, 8))
dev.off()
## quartz_off_screen
## 2
# breed most abundant
breed_lefse <- read.table("heatmap/breed_lefse_final.txt", header = T, sep = "\t")
row.names(breed_lefse) <- breed_lefse$OTUs
breed_lefse <- breed_lefse[, -1]
breed_lefse_tax <- subset(breed_lefse, select = c(taxonomy))
breed_lefse_samples <- breed_lefse[, -10]
breed_lefse_trans <- as.data.frame(t(breed_lefse_samples))
scalewhiteblack <- colorRampPalette(c("white", "black"), space = "rgb")(100)
maxab <- apply(breed_lefse_trans, 2, max)
n1 <- names(which(maxab < 0.01))
breed_abun <- breed_lefse_trans[, -which(names(breed_lefse_trans) %in% n1)]
data.dist <- vegdist(breed_lefse_trans, method = "bray")
row.clus <- hclust(data.dist, "aver")
breeds_abun_dist <- vegdist(t(breed_abun), method = "bray")
col.clus <- hclust(breeds_abun_dist, "aver")
breed_abun_trans <- as.data.frame(t(breed_abun))
merge_data <- merge(breed_abun_trans, breed_lefse_tax, by = "row.names")
row.names(merge_data) <- merge_data$Row.names
merge_data <- merge_data[, -1]
breed_abun_fam <- subset(merge_data, select = c(taxonomy))
png("Fig3b.png", height = 8, width = 8, units = "in", res = 300)
heatmap.2(as.matrix(breed_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(10, 7), trace = "none", density.info = "none",
labCol = breed_abun_fam$taxonomy, xlab = "Family", ylab = "Samples", lhei = c(2,
8))
dev.off()
## quartz_off_screen
## 2
pdf("Fig3b.pdf", height = 8, width = 8)
heatmap.2(as.matrix(breed_abun), Rowv = as.dendrogram(row.clus), Colv = as.dendrogram(col.clus),
col = scalewhiteblack, margins = c(10, 7), trace = "none", density.info = "none",
labCol = breed_abun_fam$taxonomy, xlab = "Family", ylab = "Samples", lhei = c(2,
8))
dev.off()
## quartz_off_screen
## 2